aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c5
-rw-r--r--fs/9p/acl.h2
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_inode.c39
-rw-r--r--fs/adfs/dir.c13
-rw-r--r--fs/adfs/super.c11
-rw-r--r--fs/affs/amigaffs.c4
-rw-r--r--fs/affs/namei.c68
-rw-r--r--fs/affs/super.c11
-rw-r--r--fs/afs/dir.c10
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/security.c7
-rw-r--r--fs/afs/super.c10
-rw-r--r--fs/anon_inodes.c6
-rw-r--r--fs/autofs4/autofs_i.h21
-rw-r--r--fs/autofs4/expire.c141
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c103
-rw-r--r--fs/autofs4/waitq.c23
-rw-r--r--fs/bad_inode.c5
-rw-r--r--fs/befs/linuxvfs.c10
-rw-r--r--fs/bfs/inode.c9
-rw-r--r--fs/bio.c23
-rw-r--r--fs/block_dev.c10
-rw-r--r--fs/btrfs/acl.c21
-rw-r--r--fs/btrfs/compression.c15
-rw-r--r--fs/btrfs/ctree.h8
-rw-r--r--fs/btrfs/disk-io.c41
-rw-r--r--fs/btrfs/export.c82
-rw-r--r--fs/btrfs/extent-tree.c77
-rw-r--r--fs/btrfs/extent_io.c77
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/file.c99
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/inode.c316
-rw-r--r--fs/btrfs/ioctl.c87
-rw-r--r--fs/btrfs/ioctl.h14
-rw-r--r--fs/btrfs/ordered-data.c67
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/orphan.c6
-rw-r--r--fs/btrfs/super.c43
-rw-r--r--fs/btrfs/transaction.c5
-rw-r--r--fs/btrfs/tree-log.c21
-rw-r--r--fs/btrfs/volumes.c20
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--fs/buffer.c37
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/caps.c17
-rw-r--r--fs/ceph/dir.c51
-rw-r--r--fs/ceph/file.c65
-rw-r--r--fs/ceph/inode.c88
-rw-r--r--fs/ceph/ioctl.h2
-rw-r--r--fs/ceph/locks.c94
-rw-r--r--fs/ceph/mds_client.c51
-rw-r--r--fs/ceph/mds_client.h33
-rw-r--r--fs/ceph/super.h6
-rw-r--r--fs/cifs/Kconfig8
-rw-r--r--fs/cifs/Makefile4
-rw-r--r--fs/cifs/README9
-rw-r--r--fs/cifs/TODO2
-rw-r--r--fs/cifs/cifs_fs_sb.h7
-rw-r--r--fs/cifs/cifsacl.c51
-rw-r--r--fs/cifs/cifsacl.h4
-rw-r--r--fs/cifs/cifsfs.c26
-rw-r--r--fs/cifs/cifsglob.h15
-rw-r--r--fs/cifs/cifsproto.h12
-rw-r--r--fs/cifs/cifssmb.c183
-rw-r--r--fs/cifs/connect.c242
-rw-r--r--fs/cifs/dir.c77
-rw-r--r--fs/cifs/dns_resolve.c2
-rw-r--r--fs/cifs/file.c78
-rw-r--r--fs/cifs/fscache.c12
-rw-r--r--fs/cifs/inode.c72
-rw-r--r--fs/cifs/ioctl.c16
-rw-r--r--fs/cifs/link.c4
-rw-r--r--fs/cifs/misc.c25
-rw-r--r--fs/cifs/readdir.c47
-rw-r--r--fs/cifs/xattr.c55
-rw-r--r--fs/coda/cache.c4
-rw-r--r--fs/coda/dir.c20
-rw-r--r--fs/coda/inode.c9
-rw-r--r--fs/coda/pioctl.c6
-rw-r--r--fs/compat.c28
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c24
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/dcache.c1375
-rw-r--r--fs/ecryptfs/dentry.c9
-rw-r--r--fs/ecryptfs/inode.c12
-rw-r--r--fs/ecryptfs/main.c4
-rw-r--r--fs/ecryptfs/super.c13
-rw-r--r--fs/efs/super.c9
-rw-r--r--fs/exec.c41
-rw-r--r--fs/exofs/super.c9
-rw-r--r--fs/exportfs/expfs.c14
-rw-r--r--fs/ext2/acl.c11
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/super.c9
-rw-r--r--fs/ext3/acl.c11
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/super.c10
-rw-r--r--fs/ext4/acl.c11
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/inode.c8
-rw-r--r--fs/ext4/ioctl.c24
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/page-io.c99
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/ext4/super.c134
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fat/namei_msdos.c23
-rw-r--r--fs/fat/namei_vfat.c55
-rw-r--r--fs/filesystems.c3
-rw-r--r--fs/freevxfs/vxfs_inode.c9
-rw-r--r--fs/fs_struct.c36
-rw-r--r--fs/fuse/dir.c18
-rw-r--r--fs/fuse/file.c82
-rw-r--r--fs/fuse/inode.c13
-rw-r--r--fs/generic_acl.c20
-rw-r--r--fs/gfs2/acl.c5
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/bmap.c11
-rw-r--r--fs/gfs2/dentry.c22
-rw-r--r--fs/gfs2/export.c50
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/glock.c92
-rw-r--r--fs/gfs2/glock.h28
-rw-r--r--fs/gfs2/glops.c1
-rw-r--r--fs/gfs2/incore.h12
-rw-r--r--fs/gfs2/inode.c165
-rw-r--r--fs/gfs2/inode.h6
-rw-r--r--fs/gfs2/lock_dlm.c15
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/ops_inode.c38
-rw-r--r--fs/gfs2/quota.c28
-rw-r--r--fs/gfs2/rgrp.c146
-rw-r--r--fs/gfs2/rgrp.h1
-rw-r--r--fs/gfs2/super.c9
-rw-r--r--fs/gfs2/xattr.c23
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/hfs_fs.h8
-rw-r--r--fs/hfs/string.c17
-rw-r--r--fs/hfs/super.c11
-rw-r--r--fs/hfs/sysdep.c7
-rw-r--r--fs/hfsplus/dir.c2
-rw-r--r--fs/hfsplus/hfsplus_fs.h9
-rw-r--r--fs/hfsplus/super.c12
-rw-r--r--fs/hfsplus/unicode.c19
-rw-r--r--fs/hostfs/hostfs_kern.c44
-rw-r--r--fs/hpfs/dentry.c27
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c9
-rw-r--r--fs/hppfs/hppfs.c9
-rw-r--r--fs/hugetlbfs/inode.c12
-rw-r--r--fs/inode.c50
-rw-r--r--fs/internal.h1
-rw-r--r--fs/ioctl.c40
-rw-r--r--fs/ioprio.c13
-rw-r--r--fs/isofs/inode.c131
-rw-r--r--fs/isofs/namei.c5
-rw-r--r--fs/jbd2/journal.c16
-rw-r--r--fs/jffs2/acl.c5
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/super.c9
-rw-r--r--fs/jfs/acl.c8
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/namei.c63
-rw-r--r--fs/jfs/super.c12
-rw-r--r--fs/libfs.c63
-rw-r--r--fs/lockd/clntlock.c1
-rw-r--r--fs/lockd/clntproc.c1
-rw-r--r--fs/lockd/host.c11
-rw-r--r--fs/lockd/svc4proc.c1
-rw-r--r--fs/lockd/svclock.c1
-rw-r--r--fs/lockd/svcproc.c1
-rw-r--r--fs/locks.c22
-rw-r--r--fs/logfs/dir.c6
-rw-r--r--fs/logfs/inode.c9
-rw-r--r--fs/logfs/journal.c2
-rw-r--r--fs/logfs/readwrite.c3
-rw-r--r--fs/minix/inode.c9
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/namei.c860
-rw-r--r--fs/namespace.c292
-rw-r--r--fs/ncpfs/dir.c89
-rw-r--r--fs/ncpfs/file.c1
-rw-r--r--fs/ncpfs/inode.c20
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/ncpfs/ncplib_kernel.h16
-rw-r--r--fs/nfs/callback.c1
-rw-r--r--fs/nfs/delegation.c1
-rw-r--r--fs/nfs/dir.c252
-rw-r--r--fs/nfs/direct.c2
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/getroot.c10
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h9
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/namespace.c17
-rw-r--r--fs/nfs/nfs2xdr.c8
-rw-r--r--fs/nfs/nfs3xdr.c8
-rw-r--r--fs/nfs/nfs4proc.c13
-rw-r--r--fs/nfs/nfs4xdr.c8
-rw-r--r--fs/nfs/pagelist.c4
-rw-r--r--fs/nfs/read.c1
-rw-r--r--fs/nfs/super.c13
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nfsd/nfs3xdr.c6
-rw-r--r--fs/nfsd/nfs4state.c26
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/nfsd/xdr4.h21
-rw-r--r--fs/nilfs2/dat.c2
-rw-r--r--fs/nilfs2/gcinode.c9
-rw-r--r--fs/nilfs2/inode.c10
-rw-r--r--fs/nilfs2/ioctl.c16
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/super.c12
-rw-r--r--fs/notify/fanotify/fanotify.c6
-rw-r--r--fs/notify/fanotify/fanotify_user.c81
-rw-r--r--fs/notify/fsnotify.c8
-rw-r--r--fs/notify/inotify/inotify_user.c1
-rw-r--r--fs/ntfs/inode.c9
-rw-r--r--fs/ocfs2/acl.c8
-rw-r--r--fs/ocfs2/acl.h2
-rw-r--r--fs/ocfs2/aops.c7
-rw-r--r--fs/ocfs2/aops.h23
-rw-r--r--fs/ocfs2/cluster/heartbeat.c17
-rw-r--r--fs/ocfs2/cluster/masklog.c3
-rw-r--r--fs/ocfs2/cluster/masklog.h15
-rw-r--r--fs/ocfs2/cluster/quorum.c4
-rw-r--r--fs/ocfs2/dcache.c21
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c40
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c9
-rw-r--r--fs/ocfs2/export.c4
-rw-r--r--fs/ocfs2/file.c22
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/namei.c10
-rw-r--r--fs/ocfs2/ocfs2.h6
-rw-r--r--fs/ocfs2/ocfs2_fs.h2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/super.c10
-rw-r--r--fs/openpromfs/inode.c11
-rw-r--r--fs/pipe.c26
-rw-r--r--fs/pnode.c4
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/base.c134
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/inode.c10
-rw-r--r--fs/proc/proc_console.c114
-rw-r--r--fs/proc/proc_sysctl.c31
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/qnx4/inode.c9
-rw-r--r--fs/read_write.c1
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/ioctl.c8
-rw-r--r--fs/reiserfs/journal.c1
-rw-r--r--fs/reiserfs/super.c10
-rw-r--r--fs/reiserfs/xattr.c18
-rw-r--r--fs/reiserfs/xattr_acl.c6
-rw-r--r--fs/romfs/super.c9
-rw-r--r--fs/splice.c24
-rw-r--r--fs/squashfs/super.c9
-rw-r--r--fs/super.c5
-rw-r--r--fs/sysfs/dir.c10
-rw-r--r--fs/sysfs/inode.c11
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/sysv/inode.c9
-rw-r--r--fs/sysv/namei.c5
-rw-r--r--fs/sysv/super.c2
-rw-r--r--fs/ubifs/super.c10
-rw-r--r--fs/udf/super.c9
-rw-r--r--fs/ufs/super.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c101
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c37
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c1
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--fs/xfs/xfs_bmap.c85
-rw-r--r--fs/xfs/xfs_bmap.h5
-rw-r--r--fs/xfs/xfs_dfrag.c13
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_error.h5
-rw-r--r--fs/xfs/xfs_filestream.c8
-rw-r--r--fs/xfs/xfs_iget.c13
-rw-r--r--fs/xfs/xfs_inode_item.c31
-rw-r--r--fs/xfs/xfs_mount.c1
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_quota.h20
-rw-r--r--fs/xfs/xfs_rename.c1
299 files changed, 6288 insertions, 3139 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 12d602351dbe..6e58c4ca1e6e 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -91,11 +91,14 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
91 return acl; 91 return acl;
92} 92}
93 93
94int v9fs_check_acl(struct inode *inode, int mask) 94int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
95{ 95{
96 struct posix_acl *acl; 96 struct posix_acl *acl;
97 struct v9fs_session_info *v9ses; 97 struct v9fs_session_info *v9ses;
98 98
99 if (flags & IPERM_FLAG_RCU)
100 return -ECHILD;
101
99 v9ses = v9fs_inode2v9ses(inode); 102 v9ses = v9fs_inode2v9ses(inode);
100 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 103 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
101 /* 104 /*
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 59e18c2e8c7e..7ef3ac9f6d95 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,7 +16,7 @@
16 16
17#ifdef CONFIG_9P_FS_POSIX_ACL 17#ifdef CONFIG_9P_FS_POSIX_ACL
18extern int v9fs_get_acl(struct inode *, struct p9_fid *); 18extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern int v9fs_check_acl(struct inode *inode, int mask); 19extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
20extern int v9fs_acl_chmod(struct dentry *); 20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *, 21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl *, struct posix_acl *); 22 struct posix_acl *, struct posix_acl *);
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index cbf4e50f3933..466d2a4fc5cb 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -51,7 +51,7 @@
51 * 51 *
52 */ 52 */
53 53
54static int v9fs_dentry_delete(struct dentry *dentry) 54static int v9fs_dentry_delete(const struct dentry *dentry)
55{ 55{
56 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 56 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
57 dentry); 57 dentry);
@@ -68,7 +68,7 @@ static int v9fs_dentry_delete(struct dentry *dentry)
68 * 68 *
69 */ 69 */
70 70
71static int v9fs_cached_dentry_delete(struct dentry *dentry) 71static int v9fs_cached_dentry_delete(const struct dentry *dentry)
72{ 72{
73 struct inode *inode = dentry->d_inode; 73 struct inode *inode = dentry->d_inode;
74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 34bf71b56542..59782981b225 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -237,10 +237,17 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
237 * 237 *
238 */ 238 */
239 239
240void v9fs_destroy_inode(struct inode *inode) 240static void v9fs_i_callback(struct rcu_head *head)
241{ 241{
242 struct inode *inode = container_of(head, struct inode, i_rcu);
243 INIT_LIST_HEAD(&inode->i_dentry);
242 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); 244 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
243} 245}
246
247void v9fs_destroy_inode(struct inode *inode)
248{
249 call_rcu(&inode->i_rcu, v9fs_i_callback);
250}
244#endif 251#endif
245 252
246/** 253/**
@@ -270,11 +277,11 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
270{ 277{
271 struct dentry *dentry; 278 struct dentry *dentry;
272 279
273 spin_lock(&dcache_lock); 280 spin_lock(&inode->i_lock);
274 /* Directory should have only one entry. */ 281 /* Directory should have only one entry. */
275 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); 282 BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
276 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); 283 dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
277 spin_unlock(&dcache_lock); 284 spin_unlock(&inode->i_lock);
278 return dentry; 285 return dentry;
279} 286}
280 287
@@ -628,9 +635,9 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
628 } 635 }
629 636
630 if (v9ses->cache) 637 if (v9ses->cache)
631 dentry->d_op = &v9fs_cached_dentry_operations; 638 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
632 else 639 else
633 dentry->d_op = &v9fs_dentry_operations; 640 d_set_d_op(dentry, &v9fs_dentry_operations);
634 641
635 d_instantiate(dentry, inode); 642 d_instantiate(dentry, inode);
636 err = v9fs_fid_add(dentry, fid); 643 err = v9fs_fid_add(dentry, fid);
@@ -742,7 +749,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
742 err); 749 err);
743 goto error; 750 goto error;
744 } 751 }
745 dentry->d_op = &v9fs_cached_dentry_operations; 752 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
746 d_instantiate(dentry, inode); 753 d_instantiate(dentry, inode);
747 err = v9fs_fid_add(dentry, fid); 754 err = v9fs_fid_add(dentry, fid);
748 if (err < 0) 755 if (err < 0)
@@ -760,7 +767,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
760 err = PTR_ERR(inode); 767 err = PTR_ERR(inode);
761 goto error; 768 goto error;
762 } 769 }
763 dentry->d_op = &v9fs_dentry_operations; 770 d_set_d_op(dentry, &v9fs_dentry_operations);
764 d_instantiate(dentry, inode); 771 d_instantiate(dentry, inode);
765 } 772 }
766 /* Now set the ACL based on the default value */ 773 /* Now set the ACL based on the default value */
@@ -949,7 +956,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
949 err); 956 err);
950 goto error; 957 goto error;
951 } 958 }
952 dentry->d_op = &v9fs_cached_dentry_operations; 959 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
953 d_instantiate(dentry, inode); 960 d_instantiate(dentry, inode);
954 err = v9fs_fid_add(dentry, fid); 961 err = v9fs_fid_add(dentry, fid);
955 if (err < 0) 962 if (err < 0)
@@ -966,7 +973,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
966 err = PTR_ERR(inode); 973 err = PTR_ERR(inode);
967 goto error; 974 goto error;
968 } 975 }
969 dentry->d_op = &v9fs_dentry_operations; 976 d_set_d_op(dentry, &v9fs_dentry_operations);
970 d_instantiate(dentry, inode); 977 d_instantiate(dentry, inode);
971 } 978 }
972 /* Now set the ACL based on the default value */ 979 /* Now set the ACL based on the default value */
@@ -1034,9 +1041,9 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
1034 1041
1035inst_out: 1042inst_out:
1036 if (v9ses->cache) 1043 if (v9ses->cache)
1037 dentry->d_op = &v9fs_cached_dentry_operations; 1044 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1038 else 1045 else
1039 dentry->d_op = &v9fs_dentry_operations; 1046 d_set_d_op(dentry, &v9fs_dentry_operations);
1040 1047
1041 d_add(dentry, inode); 1048 d_add(dentry, inode);
1042 return NULL; 1049 return NULL;
@@ -1702,7 +1709,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1702 err); 1709 err);
1703 goto error; 1710 goto error;
1704 } 1711 }
1705 dentry->d_op = &v9fs_cached_dentry_operations; 1712 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1706 d_instantiate(dentry, inode); 1713 d_instantiate(dentry, inode);
1707 err = v9fs_fid_add(dentry, fid); 1714 err = v9fs_fid_add(dentry, fid);
1708 if (err < 0) 1715 if (err < 0)
@@ -1715,7 +1722,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
1715 err = PTR_ERR(inode); 1722 err = PTR_ERR(inode);
1716 goto error; 1723 goto error;
1717 } 1724 }
1718 dentry->d_op = &v9fs_dentry_operations; 1725 d_set_d_op(dentry, &v9fs_dentry_operations);
1719 d_instantiate(dentry, inode); 1726 d_instantiate(dentry, inode);
1720 } 1727 }
1721 1728
@@ -1849,7 +1856,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
1849 ihold(old_dentry->d_inode); 1856 ihold(old_dentry->d_inode);
1850 } 1857 }
1851 1858
1852 dentry->d_op = old_dentry->d_op; 1859 d_set_d_op(dentry, old_dentry->d_op);
1853 d_instantiate(dentry, old_dentry->d_inode); 1860 d_instantiate(dentry, old_dentry->d_inode);
1854 1861
1855 return err; 1862 return err;
@@ -1973,7 +1980,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
1973 err); 1980 err);
1974 goto error; 1981 goto error;
1975 } 1982 }
1976 dentry->d_op = &v9fs_cached_dentry_operations; 1983 d_set_d_op(dentry, &v9fs_cached_dentry_operations);
1977 d_instantiate(dentry, inode); 1984 d_instantiate(dentry, inode);
1978 err = v9fs_fid_add(dentry, fid); 1985 err = v9fs_fid_add(dentry, fid);
1979 if (err < 0) 1986 if (err < 0)
@@ -1989,7 +1996,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
1989 err = PTR_ERR(inode); 1996 err = PTR_ERR(inode);
1990 goto error; 1997 goto error;
1991 } 1998 }
1992 dentry->d_op = &v9fs_dentry_operations; 1999 d_set_d_op(dentry, &v9fs_dentry_operations);
1993 d_instantiate(dentry, inode); 2000 d_instantiate(dentry, inode);
1994 } 2001 }
1995 /* Now set the ACL based on the default value */ 2002 /* Now set the ACL based on the default value */
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index f4287e4de744..bf7693c384f9 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -201,7 +201,8 @@ const struct file_operations adfs_dir_operations = {
201}; 201};
202 202
203static int 203static int
204adfs_hash(struct dentry *parent, struct qstr *qstr) 204adfs_hash(const struct dentry *parent, const struct inode *inode,
205 struct qstr *qstr)
205{ 206{
206 const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen; 207 const unsigned int name_len = ADFS_SB(parent->d_sb)->s_namelen;
207 const unsigned char *name; 208 const unsigned char *name;
@@ -237,17 +238,19 @@ adfs_hash(struct dentry *parent, struct qstr *qstr)
237 * requirements of the underlying filesystem. 238 * requirements of the underlying filesystem.
238 */ 239 */
239static int 240static int
240adfs_compare(struct dentry *parent, struct qstr *entry, struct qstr *name) 241adfs_compare(const struct dentry *parent, const struct inode *pinode,
242 const struct dentry *dentry, const struct inode *inode,
243 unsigned int len, const char *str, const struct qstr *name)
241{ 244{
242 int i; 245 int i;
243 246
244 if (entry->len != name->len) 247 if (len != name->len)
245 return 1; 248 return 1;
246 249
247 for (i = 0; i < name->len; i++) { 250 for (i = 0; i < name->len; i++) {
248 char a, b; 251 char a, b;
249 252
250 a = entry->name[i]; 253 a = str[i];
251 b = name->name[i]; 254 b = name->name[i];
252 255
253 if (a >= 'A' && a <= 'Z') 256 if (a >= 'A' && a <= 'Z')
@@ -273,7 +276,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
273 struct object_info obj; 276 struct object_info obj;
274 int error; 277 int error;
275 278
276 dentry->d_op = &adfs_dentry_operations; 279 d_set_d_op(dentry, &adfs_dentry_operations);
277 lock_kernel(); 280 lock_kernel();
278 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); 281 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
279 if (error == 0) { 282 if (error == 0) {
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 959dbff2d42d..a4041b52fbca 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -240,11 +240,18 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
240 return &ei->vfs_inode; 240 return &ei->vfs_inode;
241} 241}
242 242
243static void adfs_destroy_inode(struct inode *inode) 243static void adfs_i_callback(struct rcu_head *head)
244{ 244{
245 struct inode *inode = container_of(head, struct inode, i_rcu);
246 INIT_LIST_HEAD(&inode->i_dentry);
245 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); 247 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
246} 248}
247 249
250static void adfs_destroy_inode(struct inode *inode)
251{
252 call_rcu(&inode->i_rcu, adfs_i_callback);
253}
254
248static void init_once(void *foo) 255static void init_once(void *foo)
249{ 256{
250 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; 257 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
@@ -477,7 +484,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
477 adfs_error(sb, "get root inode failed\n"); 484 adfs_error(sb, "get root inode failed\n");
478 goto error; 485 goto error;
479 } else 486 } else
480 sb->s_root->d_op = &adfs_dentry_operations; 487 d_set_d_op(sb->s_root, &adfs_dentry_operations);
481 unlock_kernel(); 488 unlock_kernel();
482 return 0; 489 return 0;
483 490
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 7d0f0a30f7a3..3a4557e8325c 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -128,7 +128,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
128 void *data = dentry->d_fsdata; 128 void *data = dentry->d_fsdata;
129 struct list_head *head, *next; 129 struct list_head *head, *next;
130 130
131 spin_lock(&dcache_lock); 131 spin_lock(&inode->i_lock);
132 head = &inode->i_dentry; 132 head = &inode->i_dentry;
133 next = head->next; 133 next = head->next;
134 while (next != head) { 134 while (next != head) {
@@ -139,7 +139,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
139 } 139 }
140 next = next->next; 140 next = next->next;
141 } 141 }
142 spin_unlock(&dcache_lock); 142 spin_unlock(&inode->i_lock);
143} 143}
144 144
145 145
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 914d1c0bc07a..944a4042fb65 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -13,11 +13,19 @@
13typedef int (*toupper_t)(int); 13typedef int (*toupper_t)(int);
14 14
15static int affs_toupper(int ch); 15static int affs_toupper(int ch);
16static int affs_hash_dentry(struct dentry *, struct qstr *); 16static int affs_hash_dentry(const struct dentry *,
17static int affs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 17 const struct inode *, struct qstr *);
18static int affs_compare_dentry(const struct dentry *parent,
19 const struct inode *pinode,
20 const struct dentry *dentry, const struct inode *inode,
21 unsigned int len, const char *str, const struct qstr *name);
18static int affs_intl_toupper(int ch); 22static int affs_intl_toupper(int ch);
19static int affs_intl_hash_dentry(struct dentry *, struct qstr *); 23static int affs_intl_hash_dentry(const struct dentry *,
20static int affs_intl_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 24 const struct inode *, struct qstr *);
25static int affs_intl_compare_dentry(const struct dentry *parent,
26 const struct inode *pinode,
27 const struct dentry *dentry, const struct inode *inode,
28 unsigned int len, const char *str, const struct qstr *name);
21 29
22const struct dentry_operations affs_dentry_operations = { 30const struct dentry_operations affs_dentry_operations = {
23 .d_hash = affs_hash_dentry, 31 .d_hash = affs_hash_dentry,
@@ -58,13 +66,13 @@ affs_get_toupper(struct super_block *sb)
58 * Note: the dentry argument is the parent dentry. 66 * Note: the dentry argument is the parent dentry.
59 */ 67 */
60static inline int 68static inline int
61__affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper) 69__affs_hash_dentry(struct qstr *qstr, toupper_t toupper)
62{ 70{
63 const u8 *name = qstr->name; 71 const u8 *name = qstr->name;
64 unsigned long hash; 72 unsigned long hash;
65 int i; 73 int i;
66 74
67 i = affs_check_name(qstr->name,qstr->len); 75 i = affs_check_name(qstr->name, qstr->len);
68 if (i) 76 if (i)
69 return i; 77 return i;
70 78
@@ -78,39 +86,41 @@ __affs_hash_dentry(struct dentry *dentry, struct qstr *qstr, toupper_t toupper)
78} 86}
79 87
80static int 88static int
81affs_hash_dentry(struct dentry *dentry, struct qstr *qstr) 89affs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
90 struct qstr *qstr)
82{ 91{
83 return __affs_hash_dentry(dentry, qstr, affs_toupper); 92 return __affs_hash_dentry(qstr, affs_toupper);
84} 93}
85static int 94static int
86affs_intl_hash_dentry(struct dentry *dentry, struct qstr *qstr) 95affs_intl_hash_dentry(const struct dentry *dentry, const struct inode *inode,
96 struct qstr *qstr)
87{ 97{
88 return __affs_hash_dentry(dentry, qstr, affs_intl_toupper); 98 return __affs_hash_dentry(qstr, affs_intl_toupper);
89} 99}
90 100
91static inline int 101static inline int __affs_compare_dentry(unsigned int len,
92__affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, toupper_t toupper) 102 const char *str, const struct qstr *name, toupper_t toupper)
93{ 103{
94 const u8 *aname = a->name; 104 const u8 *aname = str;
95 const u8 *bname = b->name; 105 const u8 *bname = name->name;
96 int len;
97 106
98 /* 'a' is the qstr of an already existing dentry, so the name 107 /*
99 * must be valid. 'b' must be validated first. 108 * 'str' is the name of an already existing dentry, so the name
109 * must be valid. 'name' must be validated first.
100 */ 110 */
101 111
102 if (affs_check_name(b->name,b->len)) 112 if (affs_check_name(name->name, name->len))
103 return 1; 113 return 1;
104 114
105 /* If the names are longer than the allowed 30 chars, 115 /*
116 * If the names are longer than the allowed 30 chars,
106 * the excess is ignored, so their length may differ. 117 * the excess is ignored, so their length may differ.
107 */ 118 */
108 len = a->len;
109 if (len >= 30) { 119 if (len >= 30) {
110 if (b->len < 30) 120 if (name->len < 30)
111 return 1; 121 return 1;
112 len = 30; 122 len = 30;
113 } else if (len != b->len) 123 } else if (len != name->len)
114 return 1; 124 return 1;
115 125
116 for (; len > 0; len--) 126 for (; len > 0; len--)
@@ -121,14 +131,18 @@ __affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b, tou
121} 131}
122 132
123static int 133static int
124affs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 134affs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
135 const struct dentry *dentry, const struct inode *inode,
136 unsigned int len, const char *str, const struct qstr *name)
125{ 137{
126 return __affs_compare_dentry(dentry, a, b, affs_toupper); 138 return __affs_compare_dentry(len, str, name, affs_toupper);
127} 139}
128static int 140static int
129affs_intl_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 141affs_intl_compare_dentry(const struct dentry *parent,const struct inode *pinode,
142 const struct dentry *dentry, const struct inode *inode,
143 unsigned int len, const char *str, const struct qstr *name)
130{ 144{
131 return __affs_compare_dentry(dentry, a, b, affs_intl_toupper); 145 return __affs_compare_dentry(len, str, name, affs_intl_toupper);
132} 146}
133 147
134/* 148/*
@@ -226,7 +240,7 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
226 if (IS_ERR(inode)) 240 if (IS_ERR(inode))
227 return ERR_CAST(inode); 241 return ERR_CAST(inode);
228 } 242 }
229 dentry->d_op = AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations; 243 d_set_d_op(dentry, AFFS_SB(sb)->s_flags & SF_INTL ? &affs_intl_dentry_operations : &affs_dentry_operations);
230 d_add(dentry, inode); 244 d_add(dentry, inode);
231 return NULL; 245 return NULL;
232} 246}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0cf7f4384cbd..d39081bbe7ce 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -95,11 +95,18 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
95 return &i->vfs_inode; 95 return &i->vfs_inode;
96} 96}
97 97
98static void affs_destroy_inode(struct inode *inode) 98static void affs_i_callback(struct rcu_head *head)
99{ 99{
100 struct inode *inode = container_of(head, struct inode, i_rcu);
101 INIT_LIST_HEAD(&inode->i_dentry);
100 kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); 102 kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
101} 103}
102 104
105static void affs_destroy_inode(struct inode *inode)
106{
107 call_rcu(&inode->i_rcu, affs_i_callback);
108}
109
103static void init_once(void *foo) 110static void init_once(void *foo)
104{ 111{
105 struct affs_inode_info *ei = (struct affs_inode_info *) foo; 112 struct affs_inode_info *ei = (struct affs_inode_info *) foo;
@@ -475,7 +482,7 @@ got_root:
475 printk(KERN_ERR "AFFS: Get root inode failed\n"); 482 printk(KERN_ERR "AFFS: Get root inode failed\n");
476 goto out_error; 483 goto out_error;
477 } 484 }
478 sb->s_root->d_op = &affs_dentry_operations; 485 d_set_d_op(sb->s_root, &affs_dentry_operations);
479 486
480 pr_debug("AFFS: s_flags=%lX\n",sb->s_flags); 487 pr_debug("AFFS: s_flags=%lX\n",sb->s_flags);
481 return 0; 488 return 0;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 5439e1bc9a86..34a3263d60a4 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -13,6 +13,7 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/init.h> 14#include <linux/init.h>
15#include <linux/fs.h> 15#include <linux/fs.h>
16#include <linux/namei.h>
16#include <linux/pagemap.h> 17#include <linux/pagemap.h>
17#include <linux/ctype.h> 18#include <linux/ctype.h>
18#include <linux/sched.h> 19#include <linux/sched.h>
@@ -23,7 +24,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
23static int afs_dir_open(struct inode *inode, struct file *file); 24static int afs_dir_open(struct inode *inode, struct file *file);
24static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); 25static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
25static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); 26static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd);
26static int afs_d_delete(struct dentry *dentry); 27static int afs_d_delete(const struct dentry *dentry);
27static void afs_d_release(struct dentry *dentry); 28static void afs_d_release(struct dentry *dentry);
28static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, 29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
29 loff_t fpos, u64 ino, unsigned dtype); 30 loff_t fpos, u64 ino, unsigned dtype);
@@ -581,7 +582,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
581 } 582 }
582 583
583success: 584success:
584 dentry->d_op = &afs_fs_dentry_operations; 585 d_set_d_op(dentry, &afs_fs_dentry_operations);
585 586
586 d_add(dentry, inode); 587 d_add(dentry, inode);
587 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }", 588 _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
@@ -607,6 +608,9 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
607 void *dir_version; 608 void *dir_version;
608 int ret; 609 int ret;
609 610
611 if (nd->flags & LOOKUP_RCU)
612 return -ECHILD;
613
610 vnode = AFS_FS_I(dentry->d_inode); 614 vnode = AFS_FS_I(dentry->d_inode);
611 615
612 if (dentry->d_inode) 616 if (dentry->d_inode)
@@ -730,7 +734,7 @@ out_bad:
730 * - called from dput() when d_count is going to 0. 734 * - called from dput() when d_count is going to 0.
731 * - return 1 to request dentry be unhashed, 0 otherwise 735 * - return 1 to request dentry be unhashed, 0 otherwise
732 */ 736 */
733static int afs_d_delete(struct dentry *dentry) 737static int afs_d_delete(const struct dentry *dentry)
734{ 738{
735 _enter("%s", dentry->d_name.name); 739 _enter("%s", dentry->d_name.name);
736 740
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index cca8eef736fc..6d4bc1c8ff60 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -624,7 +624,7 @@ extern void afs_clear_permits(struct afs_vnode *);
624extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 624extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
625extern void afs_zap_permits(struct rcu_head *); 625extern void afs_zap_permits(struct rcu_head *);
626extern struct key *afs_request_key(struct afs_cell *); 626extern struct key *afs_request_key(struct afs_cell *);
627extern int afs_permission(struct inode *, int); 627extern int afs_permission(struct inode *, int, unsigned int);
628 628
629/* 629/*
630 * server.c 630 * server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index bb4ed144d0e4..f44b9d355377 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,13 +285,16 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
285 * - AFS ACLs are attached to directories only, and a file is controlled by its 285 * - AFS ACLs are attached to directories only, and a file is controlled by its
286 * parent directory's ACL 286 * parent directory's ACL
287 */ 287 */
288int afs_permission(struct inode *inode, int mask) 288int afs_permission(struct inode *inode, int mask, unsigned int flags)
289{ 289{
290 struct afs_vnode *vnode = AFS_FS_I(inode); 290 struct afs_vnode *vnode = AFS_FS_I(inode);
291 afs_access_t uninitialized_var(access); 291 afs_access_t uninitialized_var(access);
292 struct key *key; 292 struct key *key;
293 int ret; 293 int ret;
294 294
295 if (flags & IPERM_FLAG_RCU)
296 return -ECHILD;
297
295 _enter("{{%x:%u},%lx},%x,", 298 _enter("{{%x:%u},%lx},%x,",
296 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask); 299 vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
297 300
@@ -347,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
347 } 350 }
348 351
349 key_put(key); 352 key_put(key);
350 ret = generic_permission(inode, mask, NULL); 353 ret = generic_permission(inode, mask, flags, NULL);
351 _leave(" = %d", ret); 354 _leave(" = %d", ret);
352 return ret; 355 return ret;
353 356
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 27201cffece4..f901a9d7c111 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -498,6 +498,14 @@ static struct inode *afs_alloc_inode(struct super_block *sb)
498 return &vnode->vfs_inode; 498 return &vnode->vfs_inode;
499} 499}
500 500
501static void afs_i_callback(struct rcu_head *head)
502{
503 struct inode *inode = container_of(head, struct inode, i_rcu);
504 struct afs_vnode *vnode = AFS_FS_I(inode);
505 INIT_LIST_HEAD(&inode->i_dentry);
506 kmem_cache_free(afs_inode_cachep, vnode);
507}
508
501/* 509/*
502 * destroy an AFS inode struct 510 * destroy an AFS inode struct
503 */ 511 */
@@ -511,7 +519,7 @@ static void afs_destroy_inode(struct inode *inode)
511 519
512 ASSERTCMP(vnode->server, ==, NULL); 520 ASSERTCMP(vnode->server, ==, NULL);
513 521
514 kmem_cache_free(afs_inode_cachep, vnode); 522 call_rcu(&inode->i_rcu, afs_i_callback);
515 atomic_dec(&afs_count_active_inodes); 523 atomic_dec(&afs_count_active_inodes);
516} 524}
517 525
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 57ce55b2564c..5fd38112a6ca 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -102,7 +102,7 @@ struct file *anon_inode_getfile(const char *name,
102 this.name = name; 102 this.name = name;
103 this.len = strlen(name); 103 this.len = strlen(name);
104 this.hash = 0; 104 this.hash = 0;
105 path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); 105 path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
106 if (!path.dentry) 106 if (!path.dentry)
107 goto err_module; 107 goto err_module;
108 108
@@ -113,7 +113,7 @@ struct file *anon_inode_getfile(const char *name,
113 */ 113 */
114 ihold(anon_inode_inode); 114 ihold(anon_inode_inode);
115 115
116 path.dentry->d_op = &anon_inodefs_dentry_operations; 116 d_set_d_op(path.dentry, &anon_inodefs_dentry_operations);
117 d_instantiate(path.dentry, anon_inode_inode); 117 d_instantiate(path.dentry, anon_inode_inode);
118 118
119 error = -ENFILE; 119 error = -ENFILE;
@@ -232,7 +232,7 @@ static int __init anon_inode_init(void)
232 return 0; 232 return 0;
233 233
234err_mntput: 234err_mntput:
235 mntput(anon_inode_mnt); 235 mntput_long(anon_inode_mnt);
236err_unregister_filesystem: 236err_unregister_filesystem:
237 unregister_filesystem(&anon_inode_fs_type); 237 unregister_filesystem(&anon_inode_fs_type);
238err_exit: 238err_exit:
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 3d283abf67d7..0fffe1c24cec 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -16,6 +16,7 @@
16#include <linux/auto_fs4.h> 16#include <linux/auto_fs4.h>
17#include <linux/auto_dev-ioctl.h> 17#include <linux/auto_dev-ioctl.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/spinlock.h>
19#include <linux/list.h> 20#include <linux/list.h>
20 21
21/* This is the range of ioctl() numbers we claim as ours */ 22/* This is the range of ioctl() numbers we claim as ours */
@@ -60,6 +61,8 @@ do { \
60 current->pid, __func__, ##args); \ 61 current->pid, __func__, ##args); \
61} while (0) 62} while (0)
62 63
64extern spinlock_t autofs4_lock;
65
63/* Unified info structure. This is pointed to by both the dentry and 66/* Unified info structure. This is pointed to by both the dentry and
64 inode structures. Each file in the filesystem has an instance of this 67 inode structures. Each file in the filesystem has an instance of this
65 structure. It holds a reference to the dentry, so dentries are never 68 structure. It holds a reference to the dentry, so dentries are never
@@ -254,17 +257,15 @@ static inline int simple_positive(struct dentry *dentry)
254 return dentry->d_inode && !d_unhashed(dentry); 257 return dentry->d_inode && !d_unhashed(dentry);
255} 258}
256 259
257static inline int __simple_empty(struct dentry *dentry) 260static inline void __autofs4_add_expiring(struct dentry *dentry)
258{ 261{
259 struct dentry *child; 262 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
260 int ret = 0; 263 struct autofs_info *ino = autofs4_dentry_ino(dentry);
261 264 if (ino) {
262 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) 265 if (list_empty(&ino->expiring))
263 if (simple_positive(child)) 266 list_add(&ino->expiring, &sbi->expiring_list);
264 goto out; 267 }
265 ret = 1; 268 return;
266out:
267 return ret;
268} 269}
269 270
270static inline void autofs4_add_expiring(struct dentry *dentry) 271static inline void autofs4_add_expiring(struct dentry *dentry)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a796c9417fb1..cc1d01365905 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -91,24 +91,64 @@ done:
91} 91}
92 92
93/* 93/*
94 * Calculate next entry in top down tree traversal. 94 * Calculate and dget next entry in top down tree traversal.
95 * From next_mnt in namespace.c - elegant.
96 */ 95 */
97static struct dentry *next_dentry(struct dentry *p, struct dentry *root) 96static struct dentry *get_next_positive_dentry(struct dentry *prev,
97 struct dentry *root)
98{ 98{
99 struct list_head *next = p->d_subdirs.next; 99 struct list_head *next;
100 struct dentry *p, *ret;
101
102 if (prev == NULL)
103 return dget(prev);
100 104
105 spin_lock(&autofs4_lock);
106relock:
107 p = prev;
108 spin_lock(&p->d_lock);
109again:
110 next = p->d_subdirs.next;
101 if (next == &p->d_subdirs) { 111 if (next == &p->d_subdirs) {
102 while (1) { 112 while (1) {
103 if (p == root) 113 struct dentry *parent;
114
115 if (p == root) {
116 spin_unlock(&p->d_lock);
117 spin_unlock(&autofs4_lock);
118 dput(prev);
104 return NULL; 119 return NULL;
120 }
121
122 parent = p->d_parent;
123 if (!spin_trylock(&parent->d_lock)) {
124 spin_unlock(&p->d_lock);
125 cpu_relax();
126 goto relock;
127 }
128 spin_unlock(&p->d_lock);
105 next = p->d_u.d_child.next; 129 next = p->d_u.d_child.next;
106 if (next != &p->d_parent->d_subdirs) 130 p = parent;
131 if (next != &parent->d_subdirs)
107 break; 132 break;
108 p = p->d_parent;
109 } 133 }
110 } 134 }
111 return list_entry(next, struct dentry, d_u.d_child); 135 ret = list_entry(next, struct dentry, d_u.d_child);
136
137 spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
138 /* Negative dentry - try next */
139 if (!simple_positive(ret)) {
140 spin_unlock(&ret->d_lock);
141 p = ret;
142 goto again;
143 }
144 dget_dlock(ret);
145 spin_unlock(&ret->d_lock);
146 spin_unlock(&p->d_lock);
147 spin_unlock(&autofs4_lock);
148
149 dput(prev);
150
151 return ret;
112} 152}
113 153
114/* 154/*
@@ -158,18 +198,11 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
158 if (!simple_positive(top)) 198 if (!simple_positive(top))
159 return 1; 199 return 1;
160 200
161 spin_lock(&dcache_lock); 201 p = NULL;
162 for (p = top; p; p = next_dentry(p, top)) { 202 while ((p = get_next_positive_dentry(p, top))) {
163 /* Negative dentry - give up */
164 if (!simple_positive(p))
165 continue;
166
167 DPRINTK("dentry %p %.*s", 203 DPRINTK("dentry %p %.*s",
168 p, (int) p->d_name.len, p->d_name.name); 204 p, (int) p->d_name.len, p->d_name.name);
169 205
170 p = dget(p);
171 spin_unlock(&dcache_lock);
172
173 /* 206 /*
174 * Is someone visiting anywhere in the subtree ? 207 * Is someone visiting anywhere in the subtree ?
175 * If there's no mount we need to check the usage 208 * If there's no mount we need to check the usage
@@ -198,16 +231,13 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
198 else 231 else
199 ino_count++; 232 ino_count++;
200 233
201 if (atomic_read(&p->d_count) > ino_count) { 234 if (p->d_count > ino_count) {
202 top_ino->last_used = jiffies; 235 top_ino->last_used = jiffies;
203 dput(p); 236 dput(p);
204 return 1; 237 return 1;
205 } 238 }
206 } 239 }
207 dput(p);
208 spin_lock(&dcache_lock);
209 } 240 }
210 spin_unlock(&dcache_lock);
211 241
212 /* Timeout of a tree mount is ultimately determined by its top dentry */ 242 /* Timeout of a tree mount is ultimately determined by its top dentry */
213 if (!autofs4_can_expire(top, timeout, do_now)) 243 if (!autofs4_can_expire(top, timeout, do_now))
@@ -226,32 +256,21 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
226 DPRINTK("parent %p %.*s", 256 DPRINTK("parent %p %.*s",
227 parent, (int)parent->d_name.len, parent->d_name.name); 257 parent, (int)parent->d_name.len, parent->d_name.name);
228 258
229 spin_lock(&dcache_lock); 259 p = NULL;
230 for (p = parent; p; p = next_dentry(p, parent)) { 260 while ((p = get_next_positive_dentry(p, parent))) {
231 /* Negative dentry - give up */
232 if (!simple_positive(p))
233 continue;
234
235 DPRINTK("dentry %p %.*s", 261 DPRINTK("dentry %p %.*s",
236 p, (int) p->d_name.len, p->d_name.name); 262 p, (int) p->d_name.len, p->d_name.name);
237 263
238 p = dget(p);
239 spin_unlock(&dcache_lock);
240
241 if (d_mountpoint(p)) { 264 if (d_mountpoint(p)) {
242 /* Can we umount this guy */ 265 /* Can we umount this guy */
243 if (autofs4_mount_busy(mnt, p)) 266 if (autofs4_mount_busy(mnt, p))
244 goto cont; 267 continue;
245 268
246 /* Can we expire this guy */ 269 /* Can we expire this guy */
247 if (autofs4_can_expire(p, timeout, do_now)) 270 if (autofs4_can_expire(p, timeout, do_now))
248 return p; 271 return p;
249 } 272 }
250cont:
251 dput(p);
252 spin_lock(&dcache_lock);
253 } 273 }
254 spin_unlock(&dcache_lock);
255 return NULL; 274 return NULL;
256} 275}
257 276
@@ -276,7 +295,9 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
276 struct autofs_info *ino = autofs4_dentry_ino(root); 295 struct autofs_info *ino = autofs4_dentry_ino(root);
277 if (d_mountpoint(root)) { 296 if (d_mountpoint(root)) {
278 ino->flags |= AUTOFS_INF_MOUNTPOINT; 297 ino->flags |= AUTOFS_INF_MOUNTPOINT;
279 root->d_mounted--; 298 spin_lock(&root->d_lock);
299 root->d_flags &= ~DCACHE_MOUNTED;
300 spin_unlock(&root->d_lock);
280 } 301 }
281 ino->flags |= AUTOFS_INF_EXPIRING; 302 ino->flags |= AUTOFS_INF_EXPIRING;
282 init_completion(&ino->expire_complete); 303 init_completion(&ino->expire_complete);
@@ -302,8 +323,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
302{ 323{
303 unsigned long timeout; 324 unsigned long timeout;
304 struct dentry *root = sb->s_root; 325 struct dentry *root = sb->s_root;
326 struct dentry *dentry;
305 struct dentry *expired = NULL; 327 struct dentry *expired = NULL;
306 struct list_head *next;
307 int do_now = how & AUTOFS_EXP_IMMEDIATE; 328 int do_now = how & AUTOFS_EXP_IMMEDIATE;
308 int exp_leaves = how & AUTOFS_EXP_LEAVES; 329 int exp_leaves = how & AUTOFS_EXP_LEAVES;
309 struct autofs_info *ino; 330 struct autofs_info *ino;
@@ -315,23 +336,8 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
315 now = jiffies; 336 now = jiffies;
316 timeout = sbi->exp_timeout; 337 timeout = sbi->exp_timeout;
317 338
318 spin_lock(&dcache_lock); 339 dentry = NULL;
319 next = root->d_subdirs.next; 340 while ((dentry = get_next_positive_dentry(dentry, root))) {
320
321 /* On exit from the loop expire is set to a dgot dentry
322 * to expire or it's NULL */
323 while ( next != &root->d_subdirs ) {
324 struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
325
326 /* Negative dentry - give up */
327 if (!simple_positive(dentry)) {
328 next = next->next;
329 continue;
330 }
331
332 dentry = dget(dentry);
333 spin_unlock(&dcache_lock);
334
335 spin_lock(&sbi->fs_lock); 341 spin_lock(&sbi->fs_lock);
336 ino = autofs4_dentry_ino(dentry); 342 ino = autofs4_dentry_ino(dentry);
337 343
@@ -347,7 +353,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
347 353
348 /* Path walk currently on this dentry? */ 354 /* Path walk currently on this dentry? */
349 ino_count = atomic_read(&ino->count) + 2; 355 ino_count = atomic_read(&ino->count) + 2;
350 if (atomic_read(&dentry->d_count) > ino_count) 356 if (dentry->d_count > ino_count)
351 goto next; 357 goto next;
352 358
353 /* Can we umount this guy */ 359 /* Can we umount this guy */
@@ -369,7 +375,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
369 if (!exp_leaves) { 375 if (!exp_leaves) {
370 /* Path walk currently on this dentry? */ 376 /* Path walk currently on this dentry? */
371 ino_count = atomic_read(&ino->count) + 1; 377 ino_count = atomic_read(&ino->count) + 1;
372 if (atomic_read(&dentry->d_count) > ino_count) 378 if (dentry->d_count > ino_count)
373 goto next; 379 goto next;
374 380
375 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { 381 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
@@ -383,7 +389,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
383 } else { 389 } else {
384 /* Path walk currently on this dentry? */ 390 /* Path walk currently on this dentry? */
385 ino_count = atomic_read(&ino->count) + 1; 391 ino_count = atomic_read(&ino->count) + 1;
386 if (atomic_read(&dentry->d_count) > ino_count) 392 if (dentry->d_count > ino_count)
387 goto next; 393 goto next;
388 394
389 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 395 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
@@ -394,11 +400,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
394 } 400 }
395next: 401next:
396 spin_unlock(&sbi->fs_lock); 402 spin_unlock(&sbi->fs_lock);
397 dput(dentry);
398 spin_lock(&dcache_lock);
399 next = next->next;
400 } 403 }
401 spin_unlock(&dcache_lock);
402 return NULL; 404 return NULL;
403 405
404found: 406found:
@@ -408,9 +410,13 @@ found:
408 ino->flags |= AUTOFS_INF_EXPIRING; 410 ino->flags |= AUTOFS_INF_EXPIRING;
409 init_completion(&ino->expire_complete); 411 init_completion(&ino->expire_complete);
410 spin_unlock(&sbi->fs_lock); 412 spin_unlock(&sbi->fs_lock);
411 spin_lock(&dcache_lock); 413 spin_lock(&autofs4_lock);
414 spin_lock(&expired->d_parent->d_lock);
415 spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
412 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 416 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
413 spin_unlock(&dcache_lock); 417 spin_unlock(&expired->d_lock);
418 spin_unlock(&expired->d_parent->d_lock);
419 spin_unlock(&autofs4_lock);
414 return expired; 420 return expired;
415} 421}
416 422
@@ -499,7 +505,14 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
499 505
500 spin_lock(&sbi->fs_lock); 506 spin_lock(&sbi->fs_lock);
501 if (ino->flags & AUTOFS_INF_MOUNTPOINT) { 507 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
502 sb->s_root->d_mounted++; 508 spin_lock(&sb->s_root->d_lock);
509 /*
510 * If we haven't been expired away, then reset
511 * mounted status.
512 */
513 if (mnt->mnt_parent != mnt)
514 sb->s_root->d_flags |= DCACHE_MOUNTED;
515 spin_unlock(&sb->s_root->d_lock);
503 ino->flags &= ~AUTOFS_INF_MOUNTPOINT; 516 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
504 } 517 }
505 ino->flags &= ~AUTOFS_INF_EXPIRING; 518 ino->flags &= ~AUTOFS_INF_EXPIRING;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index ac87e49fa706..a7bdb9dcac84 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -309,7 +309,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
309 goto fail_iput; 309 goto fail_iput;
310 pipe = NULL; 310 pipe = NULL;
311 311
312 root->d_op = &autofs4_sb_dentry_operations; 312 d_set_d_op(root, &autofs4_sb_dentry_operations);
313 root->d_fsdata = ino; 313 root->d_fsdata = ino;
314 314
315 /* Can this call block? */ 315 /* Can this call block? */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d5c1401f0031..651e4ef563b1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -23,6 +23,8 @@
23 23
24#include "autofs_i.h" 24#include "autofs_i.h"
25 25
26DEFINE_SPINLOCK(autofs4_lock);
27
26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 28static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
27static int autofs4_dir_unlink(struct inode *,struct dentry *); 29static int autofs4_dir_unlink(struct inode *,struct dentry *);
28static int autofs4_dir_rmdir(struct inode *,struct dentry *); 30static int autofs4_dir_rmdir(struct inode *,struct dentry *);
@@ -142,12 +144,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
142 * autofs file system so just let the libfs routines handle 144 * autofs file system so just let the libfs routines handle
143 * it. 145 * it.
144 */ 146 */
145 spin_lock(&dcache_lock); 147 spin_lock(&autofs4_lock);
148 spin_lock(&dentry->d_lock);
146 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 149 if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
147 spin_unlock(&dcache_lock); 150 spin_unlock(&dentry->d_lock);
151 spin_unlock(&autofs4_lock);
148 return -ENOENT; 152 return -ENOENT;
149 } 153 }
150 spin_unlock(&dcache_lock); 154 spin_unlock(&dentry->d_lock);
155 spin_unlock(&autofs4_lock);
151 156
152out: 157out:
153 return dcache_dir_open(inode, file); 158 return dcache_dir_open(inode, file);
@@ -252,9 +257,11 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
252 /* We trigger a mount for almost all flags */ 257 /* We trigger a mount for almost all flags */
253 lookup_type = autofs4_need_mount(nd->flags); 258 lookup_type = autofs4_need_mount(nd->flags);
254 spin_lock(&sbi->fs_lock); 259 spin_lock(&sbi->fs_lock);
255 spin_lock(&dcache_lock); 260 spin_lock(&autofs4_lock);
261 spin_lock(&dentry->d_lock);
256 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) { 262 if (!(lookup_type || ino->flags & AUTOFS_INF_PENDING)) {
257 spin_unlock(&dcache_lock); 263 spin_unlock(&dentry->d_lock);
264 spin_unlock(&autofs4_lock);
258 spin_unlock(&sbi->fs_lock); 265 spin_unlock(&sbi->fs_lock);
259 goto follow; 266 goto follow;
260 } 267 }
@@ -266,7 +273,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
266 */ 273 */
267 if (ino->flags & AUTOFS_INF_PENDING || 274 if (ino->flags & AUTOFS_INF_PENDING ||
268 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) { 275 (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs))) {
269 spin_unlock(&dcache_lock); 276 spin_unlock(&dentry->d_lock);
277 spin_unlock(&autofs4_lock);
270 spin_unlock(&sbi->fs_lock); 278 spin_unlock(&sbi->fs_lock);
271 279
272 status = try_to_fill_dentry(dentry, nd->flags); 280 status = try_to_fill_dentry(dentry, nd->flags);
@@ -275,7 +283,8 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
275 283
276 goto follow; 284 goto follow;
277 } 285 }
278 spin_unlock(&dcache_lock); 286 spin_unlock(&dentry->d_lock);
287 spin_unlock(&autofs4_lock);
279 spin_unlock(&sbi->fs_lock); 288 spin_unlock(&sbi->fs_lock);
280follow: 289follow:
281 /* 290 /*
@@ -306,12 +315,19 @@ out_error:
306 */ 315 */
307static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd) 316static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
308{ 317{
309 struct inode *dir = dentry->d_parent->d_inode; 318 struct inode *dir;
310 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 319 struct autofs_sb_info *sbi;
311 int oz_mode = autofs4_oz_mode(sbi); 320 int oz_mode;
312 int flags = nd ? nd->flags : 0; 321 int flags = nd ? nd->flags : 0;
313 int status = 1; 322 int status = 1;
314 323
324 if (flags & LOOKUP_RCU)
325 return -ECHILD;
326
327 dir = dentry->d_parent->d_inode;
328 sbi = autofs4_sbi(dir->i_sb);
329 oz_mode = autofs4_oz_mode(sbi);
330
315 /* Pending dentry */ 331 /* Pending dentry */
316 spin_lock(&sbi->fs_lock); 332 spin_lock(&sbi->fs_lock);
317 if (autofs4_ispending(dentry)) { 333 if (autofs4_ispending(dentry)) {
@@ -346,12 +362,14 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
346 return 0; 362 return 0;
347 363
348 /* Check for a non-mountpoint directory with no contents */ 364 /* Check for a non-mountpoint directory with no contents */
349 spin_lock(&dcache_lock); 365 spin_lock(&autofs4_lock);
366 spin_lock(&dentry->d_lock);
350 if (S_ISDIR(dentry->d_inode->i_mode) && 367 if (S_ISDIR(dentry->d_inode->i_mode) &&
351 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { 368 !d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) {
352 DPRINTK("dentry=%p %.*s, emptydir", 369 DPRINTK("dentry=%p %.*s, emptydir",
353 dentry, dentry->d_name.len, dentry->d_name.name); 370 dentry, dentry->d_name.len, dentry->d_name.name);
354 spin_unlock(&dcache_lock); 371 spin_unlock(&dentry->d_lock);
372 spin_unlock(&autofs4_lock);
355 373
356 /* The daemon never causes a mount to trigger */ 374 /* The daemon never causes a mount to trigger */
357 if (oz_mode) 375 if (oz_mode)
@@ -367,7 +385,8 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
367 385
368 return status; 386 return status;
369 } 387 }
370 spin_unlock(&dcache_lock); 388 spin_unlock(&dentry->d_lock);
389 spin_unlock(&autofs4_lock);
371 390
372 return 1; 391 return 1;
373} 392}
@@ -422,7 +441,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
422 const unsigned char *str = name->name; 441 const unsigned char *str = name->name;
423 struct list_head *p, *head; 442 struct list_head *p, *head;
424 443
425 spin_lock(&dcache_lock); 444 spin_lock(&autofs4_lock);
426 spin_lock(&sbi->lookup_lock); 445 spin_lock(&sbi->lookup_lock);
427 head = &sbi->active_list; 446 head = &sbi->active_list;
428 list_for_each(p, head) { 447 list_for_each(p, head) {
@@ -436,7 +455,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
436 spin_lock(&active->d_lock); 455 spin_lock(&active->d_lock);
437 456
438 /* Already gone? */ 457 /* Already gone? */
439 if (atomic_read(&active->d_count) == 0) 458 if (active->d_count == 0)
440 goto next; 459 goto next;
441 460
442 qstr = &active->d_name; 461 qstr = &active->d_name;
@@ -452,17 +471,17 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry)
452 goto next; 471 goto next;
453 472
454 if (d_unhashed(active)) { 473 if (d_unhashed(active)) {
455 dget(active); 474 dget_dlock(active);
456 spin_unlock(&active->d_lock); 475 spin_unlock(&active->d_lock);
457 spin_unlock(&sbi->lookup_lock); 476 spin_unlock(&sbi->lookup_lock);
458 spin_unlock(&dcache_lock); 477 spin_unlock(&autofs4_lock);
459 return active; 478 return active;
460 } 479 }
461next: 480next:
462 spin_unlock(&active->d_lock); 481 spin_unlock(&active->d_lock);
463 } 482 }
464 spin_unlock(&sbi->lookup_lock); 483 spin_unlock(&sbi->lookup_lock);
465 spin_unlock(&dcache_lock); 484 spin_unlock(&autofs4_lock);
466 485
467 return NULL; 486 return NULL;
468} 487}
@@ -477,7 +496,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
477 const unsigned char *str = name->name; 496 const unsigned char *str = name->name;
478 struct list_head *p, *head; 497 struct list_head *p, *head;
479 498
480 spin_lock(&dcache_lock); 499 spin_lock(&autofs4_lock);
481 spin_lock(&sbi->lookup_lock); 500 spin_lock(&sbi->lookup_lock);
482 head = &sbi->expiring_list; 501 head = &sbi->expiring_list;
483 list_for_each(p, head) { 502 list_for_each(p, head) {
@@ -507,17 +526,17 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
507 goto next; 526 goto next;
508 527
509 if (d_unhashed(expiring)) { 528 if (d_unhashed(expiring)) {
510 dget(expiring); 529 dget_dlock(expiring);
511 spin_unlock(&expiring->d_lock); 530 spin_unlock(&expiring->d_lock);
512 spin_unlock(&sbi->lookup_lock); 531 spin_unlock(&sbi->lookup_lock);
513 spin_unlock(&dcache_lock); 532 spin_unlock(&autofs4_lock);
514 return expiring; 533 return expiring;
515 } 534 }
516next: 535next:
517 spin_unlock(&expiring->d_lock); 536 spin_unlock(&expiring->d_lock);
518 } 537 }
519 spin_unlock(&sbi->lookup_lock); 538 spin_unlock(&sbi->lookup_lock);
520 spin_unlock(&dcache_lock); 539 spin_unlock(&autofs4_lock);
521 540
522 return NULL; 541 return NULL;
523} 542}
@@ -559,7 +578,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
559 * we check for the hashed dentry and return the newly 578 * we check for the hashed dentry and return the newly
560 * hashed dentry. 579 * hashed dentry.
561 */ 580 */
562 dentry->d_op = &autofs4_root_dentry_operations; 581 d_set_d_op(dentry, &autofs4_root_dentry_operations);
563 582
564 /* 583 /*
565 * And we need to ensure that the same dentry is used for 584 * And we need to ensure that the same dentry is used for
@@ -698,9 +717,9 @@ static int autofs4_dir_symlink(struct inode *dir,
698 d_add(dentry, inode); 717 d_add(dentry, inode);
699 718
700 if (dir == dir->i_sb->s_root->d_inode) 719 if (dir == dir->i_sb->s_root->d_inode)
701 dentry->d_op = &autofs4_root_dentry_operations; 720 d_set_d_op(dentry, &autofs4_root_dentry_operations);
702 else 721 else
703 dentry->d_op = &autofs4_dentry_operations; 722 d_set_d_op(dentry, &autofs4_dentry_operations);
704 723
705 dentry->d_fsdata = ino; 724 dentry->d_fsdata = ino;
706 ino->dentry = dget(dentry); 725 ino->dentry = dget(dentry);
@@ -753,12 +772,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
753 772
754 dir->i_mtime = CURRENT_TIME; 773 dir->i_mtime = CURRENT_TIME;
755 774
756 spin_lock(&dcache_lock); 775 spin_lock(&autofs4_lock);
757 autofs4_add_expiring(dentry); 776 autofs4_add_expiring(dentry);
758 spin_lock(&dentry->d_lock); 777 spin_lock(&dentry->d_lock);
759 __d_drop(dentry); 778 __d_drop(dentry);
760 spin_unlock(&dentry->d_lock); 779 spin_unlock(&dentry->d_lock);
761 spin_unlock(&dcache_lock); 780 spin_unlock(&autofs4_lock);
762 781
763 return 0; 782 return 0;
764} 783}
@@ -775,16 +794,20 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
775 if (!autofs4_oz_mode(sbi)) 794 if (!autofs4_oz_mode(sbi))
776 return -EACCES; 795 return -EACCES;
777 796
778 spin_lock(&dcache_lock); 797 spin_lock(&autofs4_lock);
798 spin_lock(&sbi->lookup_lock);
799 spin_lock(&dentry->d_lock);
779 if (!list_empty(&dentry->d_subdirs)) { 800 if (!list_empty(&dentry->d_subdirs)) {
780 spin_unlock(&dcache_lock); 801 spin_unlock(&dentry->d_lock);
802 spin_unlock(&sbi->lookup_lock);
803 spin_unlock(&autofs4_lock);
781 return -ENOTEMPTY; 804 return -ENOTEMPTY;
782 } 805 }
783 autofs4_add_expiring(dentry); 806 __autofs4_add_expiring(dentry);
784 spin_lock(&dentry->d_lock); 807 spin_unlock(&sbi->lookup_lock);
785 __d_drop(dentry); 808 __d_drop(dentry);
786 spin_unlock(&dentry->d_lock); 809 spin_unlock(&dentry->d_lock);
787 spin_unlock(&dcache_lock); 810 spin_unlock(&autofs4_lock);
788 811
789 if (atomic_dec_and_test(&ino->count)) { 812 if (atomic_dec_and_test(&ino->count)) {
790 p_ino = autofs4_dentry_ino(dentry->d_parent); 813 p_ino = autofs4_dentry_ino(dentry->d_parent);
@@ -829,9 +852,9 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
829 d_add(dentry, inode); 852 d_add(dentry, inode);
830 853
831 if (dir == dir->i_sb->s_root->d_inode) 854 if (dir == dir->i_sb->s_root->d_inode)
832 dentry->d_op = &autofs4_root_dentry_operations; 855 d_set_d_op(dentry, &autofs4_root_dentry_operations);
833 else 856 else
834 dentry->d_op = &autofs4_dentry_operations; 857 d_set_d_op(dentry, &autofs4_dentry_operations);
835 858
836 dentry->d_fsdata = ino; 859 dentry->d_fsdata = ino;
837 ino->dentry = dget(dentry); 860 ino->dentry = dget(dentry);
@@ -980,19 +1003,11 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
980 } 1003 }
981} 1004}
982 1005
983static DEFINE_MUTEX(autofs4_ioctl_mutex);
984
985static long autofs4_root_ioctl(struct file *filp, 1006static long autofs4_root_ioctl(struct file *filp,
986 unsigned int cmd, unsigned long arg) 1007 unsigned int cmd, unsigned long arg)
987{ 1008{
988 long ret;
989 struct inode *inode = filp->f_dentry->d_inode; 1009 struct inode *inode = filp->f_dentry->d_inode;
990 1010 return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
991 mutex_lock(&autofs4_ioctl_mutex);
992 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
993 mutex_unlock(&autofs4_ioctl_mutex);
994
995 return ret;
996} 1011}
997 1012
998#ifdef CONFIG_COMPAT 1013#ifdef CONFIG_COMPAT
@@ -1002,13 +1017,11 @@ static long autofs4_root_compat_ioctl(struct file *filp,
1002 struct inode *inode = filp->f_path.dentry->d_inode; 1017 struct inode *inode = filp->f_path.dentry->d_inode;
1003 int ret; 1018 int ret;
1004 1019
1005 mutex_lock(&autofs4_ioctl_mutex);
1006 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) 1020 if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
1007 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); 1021 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
1008 else 1022 else
1009 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, 1023 ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
1010 (unsigned long)compat_ptr(arg)); 1024 (unsigned long)compat_ptr(arg));
1011 mutex_unlock(&autofs4_ioctl_mutex);
1012 1025
1013 return ret; 1026 return ret;
1014} 1027}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 2341375386f8..c5f8459c905e 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -186,16 +186,26 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
186{ 186{
187 struct dentry *root = sbi->sb->s_root; 187 struct dentry *root = sbi->sb->s_root;
188 struct dentry *tmp; 188 struct dentry *tmp;
189 char *buf = *name; 189 char *buf;
190 char *p; 190 char *p;
191 int len = 0; 191 int len;
192 unsigned seq;
192 193
193 spin_lock(&dcache_lock); 194rename_retry:
195 buf = *name;
196 len = 0;
197
198 seq = read_seqbegin(&rename_lock);
199 rcu_read_lock();
200 spin_lock(&autofs4_lock);
194 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) 201 for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
195 len += tmp->d_name.len + 1; 202 len += tmp->d_name.len + 1;
196 203
197 if (!len || --len > NAME_MAX) { 204 if (!len || --len > NAME_MAX) {
198 spin_unlock(&dcache_lock); 205 spin_unlock(&autofs4_lock);
206 rcu_read_unlock();
207 if (read_seqretry(&rename_lock, seq))
208 goto rename_retry;
199 return 0; 209 return 0;
200 } 210 }
201 211
@@ -208,7 +218,10 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
208 p -= tmp->d_name.len; 218 p -= tmp->d_name.len;
209 strncpy(p, tmp->d_name.name, tmp->d_name.len); 219 strncpy(p, tmp->d_name.name, tmp->d_name.len);
210 } 220 }
211 spin_unlock(&dcache_lock); 221 spin_unlock(&autofs4_lock);
222 rcu_read_unlock();
223 if (read_seqretry(&rename_lock, seq))
224 goto rename_retry;
212 225
213 return len; 226 return len;
214} 227}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f024d8aaddef..9ad2369d9e35 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -229,8 +229,11 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
229 return -EIO; 229 return -EIO;
230} 230}
231 231
232static int bad_inode_permission(struct inode *inode, int mask) 232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
233{ 233{
234 if (flags & IPERM_FLAG_RCU)
235 return -ECHILD;
236
234 return -EIO; 237 return -EIO;
235} 238}
236 239
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index aa4e7c7ae3c6..de93581b79a2 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -284,12 +284,18 @@ befs_alloc_inode(struct super_block *sb)
284 return &bi->vfs_inode; 284 return &bi->vfs_inode;
285} 285}
286 286
287static void 287static void befs_i_callback(struct rcu_head *head)
288befs_destroy_inode(struct inode *inode)
289{ 288{
289 struct inode *inode = container_of(head, struct inode, i_rcu);
290 INIT_LIST_HEAD(&inode->i_dentry);
290 kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); 291 kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
291} 292}
292 293
294static void befs_destroy_inode(struct inode *inode)
295{
296 call_rcu(&inode->i_rcu, befs_i_callback);
297}
298
293static void init_once(void *foo) 299static void init_once(void *foo)
294{ 300{
295 struct befs_inode_info *bi = (struct befs_inode_info *) foo; 301 struct befs_inode_info *bi = (struct befs_inode_info *) foo;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 76db6d7d49bb..a8e37f81d097 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -248,11 +248,18 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
248 return &bi->vfs_inode; 248 return &bi->vfs_inode;
249} 249}
250 250
251static void bfs_destroy_inode(struct inode *inode) 251static void bfs_i_callback(struct rcu_head *head)
252{ 252{
253 struct inode *inode = container_of(head, struct inode, i_rcu);
254 INIT_LIST_HEAD(&inode->i_dentry);
253 kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); 255 kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
254} 256}
255 257
258static void bfs_destroy_inode(struct inode *inode)
259{
260 call_rcu(&inode->i_rcu, bfs_i_callback);
261}
262
256static void init_once(void *foo) 263static void init_once(void *foo)
257{ 264{
258 struct bfs_inode_info *bi = foo; 265 struct bfs_inode_info *bi = foo;
diff --git a/fs/bio.c b/fs/bio.c
index 8abb2dfb2e7c..4bd454fa844e 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
370{ 370{
371 struct bio *bio; 371 struct bio *bio;
372 372
373 if (nr_iovecs > UIO_MAXIOV)
374 return NULL;
375
373 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), 376 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
374 gfp_mask); 377 gfp_mask);
375 if (unlikely(!bio)) 378 if (unlikely(!bio))
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd)
697static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, 700static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
698 gfp_t gfp_mask) 701 gfp_t gfp_mask)
699{ 702{
700 struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); 703 struct bio_map_data *bmd;
701 704
705 if (iov_count > UIO_MAXIOV)
706 return NULL;
707
708 bmd = kmalloc(sizeof(*bmd), gfp_mask);
702 if (!bmd) 709 if (!bmd)
703 return NULL; 710 return NULL;
704 711
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
827 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; 834 end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
828 start = uaddr >> PAGE_SHIFT; 835 start = uaddr >> PAGE_SHIFT;
829 836
837 /*
838 * Overflow, abort
839 */
840 if (end < start)
841 return ERR_PTR(-EINVAL);
842
830 nr_pages += end - start; 843 nr_pages += end - start;
831 len += iov[i].iov_len; 844 len += iov[i].iov_len;
832 } 845 }
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
955 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 968 unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
956 unsigned long start = uaddr >> PAGE_SHIFT; 969 unsigned long start = uaddr >> PAGE_SHIFT;
957 970
971 /*
972 * Overflow, abort
973 */
974 if (end < start)
975 return ERR_PTR(-EINVAL);
976
958 nr_pages += end - start; 977 nr_pages += end - start;
959 /* 978 /*
960 * buffer must be aligned to at least hardsector size for now 979 * buffer must be aligned to at least hardsector size for now
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
982 unsigned long start = uaddr >> PAGE_SHIFT; 1001 unsigned long start = uaddr >> PAGE_SHIFT;
983 const int local_nr_pages = end - start; 1002 const int local_nr_pages = end - start;
984 const int page_limit = cur_page + local_nr_pages; 1003 const int page_limit = cur_page + local_nr_pages;
985 1004
986 ret = get_user_pages_fast(uaddr, local_nr_pages, 1005 ret = get_user_pages_fast(uaddr, local_nr_pages,
987 write_to_vm, &pages[cur_page]); 1006 write_to_vm, &pages[cur_page]);
988 if (ret < local_nr_pages) { 1007 if (ret < local_nr_pages) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 06e8ff12b97c..771f23527010 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -11,7 +11,6 @@
11#include <linux/slab.h> 11#include <linux/slab.h>
12#include <linux/kmod.h> 12#include <linux/kmod.h>
13#include <linux/major.h> 13#include <linux/major.h>
14#include <linux/smp_lock.h>
15#include <linux/device_cgroup.h> 14#include <linux/device_cgroup.h>
16#include <linux/highmem.h> 15#include <linux/highmem.h>
17#include <linux/blkdev.h> 16#include <linux/blkdev.h>
@@ -410,13 +409,20 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
410 return &ei->vfs_inode; 409 return &ei->vfs_inode;
411} 410}
412 411
413static void bdev_destroy_inode(struct inode *inode) 412static void bdev_i_callback(struct rcu_head *head)
414{ 413{
414 struct inode *inode = container_of(head, struct inode, i_rcu);
415 struct bdev_inode *bdi = BDEV_I(inode); 415 struct bdev_inode *bdi = BDEV_I(inode);
416 416
417 INIT_LIST_HEAD(&inode->i_dentry);
417 kmem_cache_free(bdev_cachep, bdi); 418 kmem_cache_free(bdev_cachep, bdi);
418} 419}
419 420
421static void bdev_destroy_inode(struct inode *inode)
422{
423 call_rcu(&inode->i_rcu, bdev_i_callback);
424}
425
420static void init_once(void *foo) 426static void init_once(void *foo)
421{ 427{
422 struct bdev_inode *ei = (struct bdev_inode *) foo; 428 struct bdev_inode *ei = (struct bdev_inode *) foo;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 2222d161c7b6..6ae2c8cac9d5 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -185,18 +185,23 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
185 return ret; 185 return ret;
186} 186}
187 187
188int btrfs_check_acl(struct inode *inode, int mask) 188int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
189{ 189{
190 struct posix_acl *acl;
191 int error = -EAGAIN; 190 int error = -EAGAIN;
192 191
193 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); 192 if (flags & IPERM_FLAG_RCU) {
193 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
194 error = -ECHILD;
194 195
195 if (IS_ERR(acl)) 196 } else {
196 return PTR_ERR(acl); 197 struct posix_acl *acl;
197 if (acl) { 198 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
198 error = posix_acl_permission(inode, acl, mask); 199 if (IS_ERR(acl))
199 posix_acl_release(acl); 200 return PTR_ERR(acl);
201 if (acl) {
202 error = posix_acl_permission(inode, acl, mask);
203 posix_acl_release(acl);
204 }
200 } 205 }
201 206
202 return error; 207 return error;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7845d1f7d1d9..b50bc4bd5c56 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
91static struct bio *compressed_bio_alloc(struct block_device *bdev, 91static struct bio *compressed_bio_alloc(struct block_device *bdev,
92 u64 first_byte, gfp_t gfp_flags) 92 u64 first_byte, gfp_t gfp_flags)
93{ 93{
94 struct bio *bio;
95 int nr_vecs; 94 int nr_vecs;
96 95
97 nr_vecs = bio_get_nr_vecs(bdev); 96 nr_vecs = bio_get_nr_vecs(bdev);
98 bio = bio_alloc(gfp_flags, nr_vecs); 97 return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
99
100 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
101 while (!bio && (nr_vecs /= 2))
102 bio = bio_alloc(gfp_flags, nr_vecs);
103 }
104
105 if (bio) {
106 bio->bi_size = 0;
107 bio->bi_bdev = bdev;
108 bio->bi_sector = first_byte >> 9;
109 }
110 return bio;
111} 98}
112 99
113static int check_compressed_csum(struct inode *inode, 100static int check_compressed_csum(struct inode *inode,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8db9234f6b41..a142d204b526 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -808,9 +808,9 @@ struct btrfs_block_group_cache {
808 int extents_thresh; 808 int extents_thresh;
809 int free_extents; 809 int free_extents;
810 int total_bitmaps; 810 int total_bitmaps;
811 int ro:1; 811 unsigned int ro:1;
812 int dirty:1; 812 unsigned int dirty:1;
813 int iref:1; 813 unsigned int iref:1;
814 814
815 int disk_cache_state; 815 int disk_cache_state;
816 816
@@ -2544,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
2544 2544
2545/* acl.c */ 2545/* acl.c */
2546#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2546#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2547int btrfs_check_acl(struct inode *inode, int mask); 2547int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
2548#else 2548#else
2549#define btrfs_check_acl NULL 2549#define btrfs_check_acl NULL
2550#endif 2550#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fb827d0d7181..51d2e4de34eb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -28,6 +28,7 @@
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h> 30#include <linux/slab.h>
31#include <linux/migrate.h>
31#include "compat.h" 32#include "compat.h"
32#include "ctree.h" 33#include "ctree.h"
33#include "disk-io.h" 34#include "disk-io.h"
@@ -355,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
355 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, 356 ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
356 btrfs_header_generation(eb)); 357 btrfs_header_generation(eb));
357 BUG_ON(ret); 358 BUG_ON(ret);
359 WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
360
358 found_start = btrfs_header_bytenr(eb); 361 found_start = btrfs_header_bytenr(eb);
359 if (found_start != start) { 362 if (found_start != start) {
360 WARN_ON(1); 363 WARN_ON(1);
@@ -693,6 +696,27 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
693 __btree_submit_bio_done); 696 __btree_submit_bio_done);
694} 697}
695 698
699#ifdef CONFIG_MIGRATION
700static int btree_migratepage(struct address_space *mapping,
701 struct page *newpage, struct page *page)
702{
703 /*
704 * we can't safely write a btree page from here,
705 * we haven't done the locking hook
706 */
707 if (PageDirty(page))
708 return -EAGAIN;
709 /*
710 * Buffers may be managed in a filesystem specific way.
711 * We must have no buffers or drop them.
712 */
713 if (page_has_private(page) &&
714 !try_to_release_page(page, GFP_KERNEL))
715 return -EAGAIN;
716 return migrate_page(mapping, newpage, page);
717}
718#endif
719
696static int btree_writepage(struct page *page, struct writeback_control *wbc) 720static int btree_writepage(struct page *page, struct writeback_control *wbc)
697{ 721{
698 struct extent_io_tree *tree; 722 struct extent_io_tree *tree;
@@ -707,8 +731,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc)
707 } 731 }
708 732
709 redirty_page_for_writepage(wbc, page); 733 redirty_page_for_writepage(wbc, page);
710 eb = btrfs_find_tree_block(root, page_offset(page), 734 eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
711 PAGE_CACHE_SIZE);
712 WARN_ON(!eb); 735 WARN_ON(!eb);
713 736
714 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); 737 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
@@ -799,6 +822,9 @@ static const struct address_space_operations btree_aops = {
799 .releasepage = btree_releasepage, 822 .releasepage = btree_releasepage,
800 .invalidatepage = btree_invalidatepage, 823 .invalidatepage = btree_invalidatepage,
801 .sync_page = block_sync_page, 824 .sync_page = block_sync_page,
825#ifdef CONFIG_MIGRATION
826 .migratepage = btree_migratepage,
827#endif
802}; 828};
803 829
804int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, 830int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -981,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
981 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1007 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
982 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1008 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
983 blocksize, generation); 1009 blocksize, generation);
984 BUG_ON(!root->node); 1010 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
1011 free_extent_buffer(root->node);
1012 return -EIO;
1013 }
985 root->commit_root = btrfs_root_node(root); 1014 root->commit_root = btrfs_root_node(root);
986 return 0; 1015 return 0;
987} 1016}
@@ -1538,10 +1567,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1538 GFP_NOFS); 1567 GFP_NOFS);
1539 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), 1568 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1540 GFP_NOFS); 1569 GFP_NOFS);
1541 struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), 1570 struct btrfs_root *tree_root = btrfs_sb(sb);
1542 GFP_NOFS); 1571 struct btrfs_fs_info *fs_info = tree_root->fs_info;
1543 struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
1544 GFP_NOFS);
1545 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 1572 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
1546 GFP_NOFS); 1573 GFP_NOFS);
1547 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 1574 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 951ef09b82f4..0ccf9a8afcdf 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
110 110
111 dentry = d_obtain_alias(inode); 111 dentry = d_obtain_alias(inode);
112 if (!IS_ERR(dentry)) 112 if (!IS_ERR(dentry))
113 dentry->d_op = &btrfs_dentry_operations; 113 d_set_d_op(dentry, &btrfs_dentry_operations);
114 return dentry; 114 return dentry;
115fail: 115fail:
116 srcu_read_unlock(&fs_info->subvol_srcu, index); 116 srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
166static struct dentry *btrfs_get_parent(struct dentry *child) 166static struct dentry *btrfs_get_parent(struct dentry *child)
167{ 167{
168 struct inode *dir = child->d_inode; 168 struct inode *dir = child->d_inode;
169 static struct dentry *dentry; 169 struct dentry *dentry;
170 struct btrfs_root *root = BTRFS_I(dir)->root; 170 struct btrfs_root *root = BTRFS_I(dir)->root;
171 struct btrfs_path *path; 171 struct btrfs_path *path;
172 struct extent_buffer *leaf; 172 struct extent_buffer *leaf;
@@ -225,16 +225,92 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
225 key.offset = 0; 225 key.offset = 0;
226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); 226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
227 if (!IS_ERR(dentry)) 227 if (!IS_ERR(dentry))
228 dentry->d_op = &btrfs_dentry_operations; 228 d_set_d_op(dentry, &btrfs_dentry_operations);
229 return dentry; 229 return dentry;
230fail: 230fail:
231 btrfs_free_path(path); 231 btrfs_free_path(path);
232 return ERR_PTR(ret); 232 return ERR_PTR(ret);
233} 233}
234 234
235static int btrfs_get_name(struct dentry *parent, char *name,
236 struct dentry *child)
237{
238 struct inode *inode = child->d_inode;
239 struct inode *dir = parent->d_inode;
240 struct btrfs_path *path;
241 struct btrfs_root *root = BTRFS_I(dir)->root;
242 struct btrfs_inode_ref *iref;
243 struct btrfs_root_ref *rref;
244 struct extent_buffer *leaf;
245 unsigned long name_ptr;
246 struct btrfs_key key;
247 int name_len;
248 int ret;
249
250 if (!dir || !inode)
251 return -EINVAL;
252
253 if (!S_ISDIR(dir->i_mode))
254 return -EINVAL;
255
256 path = btrfs_alloc_path();
257 if (!path)
258 return -ENOMEM;
259 path->leave_spinning = 1;
260
261 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
262 key.objectid = BTRFS_I(inode)->root->root_key.objectid;
263 key.type = BTRFS_ROOT_BACKREF_KEY;
264 key.offset = (u64)-1;
265 root = root->fs_info->tree_root;
266 } else {
267 key.objectid = inode->i_ino;
268 key.offset = dir->i_ino;
269 key.type = BTRFS_INODE_REF_KEY;
270 }
271
272 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
273 if (ret < 0) {
274 btrfs_free_path(path);
275 return ret;
276 } else if (ret > 0) {
277 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
278 path->slots[0]--;
279 } else {
280 btrfs_free_path(path);
281 return -ENOENT;
282 }
283 }
284 leaf = path->nodes[0];
285
286 if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
287 rref = btrfs_item_ptr(leaf, path->slots[0],
288 struct btrfs_root_ref);
289 name_ptr = (unsigned long)(rref + 1);
290 name_len = btrfs_root_ref_name_len(leaf, rref);
291 } else {
292 iref = btrfs_item_ptr(leaf, path->slots[0],
293 struct btrfs_inode_ref);
294 name_ptr = (unsigned long)(iref + 1);
295 name_len = btrfs_inode_ref_name_len(leaf, iref);
296 }
297
298 read_extent_buffer(leaf, name, name_ptr, name_len);
299 btrfs_free_path(path);
300
301 /*
302 * have to add the null termination to make sure that reconnect_path
303 * gets the right len for strlen
304 */
305 name[name_len] = '\0';
306
307 return 0;
308}
309
235const struct export_operations btrfs_export_ops = { 310const struct export_operations btrfs_export_ops = {
236 .encode_fh = btrfs_encode_fh, 311 .encode_fh = btrfs_encode_fh,
237 .fh_to_dentry = btrfs_fh_to_dentry, 312 .fh_to_dentry = btrfs_fh_to_dentry,
238 .fh_to_parent = btrfs_fh_to_parent, 313 .fh_to_parent = btrfs_fh_to_parent,
239 .get_parent = btrfs_get_parent, 314 .get_parent = btrfs_get_parent,
315 .get_name = btrfs_get_name,
240}; 316};
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0c097f3aec41..227e5815d838 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -429,6 +429,7 @@ err:
429 429
430static int cache_block_group(struct btrfs_block_group_cache *cache, 430static int cache_block_group(struct btrfs_block_group_cache *cache,
431 struct btrfs_trans_handle *trans, 431 struct btrfs_trans_handle *trans,
432 struct btrfs_root *root,
432 int load_cache_only) 433 int load_cache_only)
433{ 434{
434 struct btrfs_fs_info *fs_info = cache->fs_info; 435 struct btrfs_fs_info *fs_info = cache->fs_info;
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
442 443
443 /* 444 /*
444 * We can't do the read from on-disk cache during a commit since we need 445 * We can't do the read from on-disk cache during a commit since we need
445 * to have the normal tree locking. 446 * to have the normal tree locking. Also if we are currently trying to
447 * allocate blocks for the tree root we can't do the fast caching since
448 * we likely hold important locks.
446 */ 449 */
447 if (!trans->transaction->in_commit) { 450 if (!trans->transaction->in_commit &&
451 (root && root != root->fs_info->tree_root)) {
448 spin_lock(&cache->lock); 452 spin_lock(&cache->lock);
449 if (cache->cached != BTRFS_CACHE_NO) { 453 if (cache->cached != BTRFS_CACHE_NO) {
450 spin_unlock(&cache->lock); 454 spin_unlock(&cache->lock);
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2741 struct btrfs_root *root = block_group->fs_info->tree_root; 2745 struct btrfs_root *root = block_group->fs_info->tree_root;
2742 struct inode *inode = NULL; 2746 struct inode *inode = NULL;
2743 u64 alloc_hint = 0; 2747 u64 alloc_hint = 0;
2748 int dcs = BTRFS_DC_ERROR;
2744 int num_pages = 0; 2749 int num_pages = 0;
2745 int retries = 0; 2750 int retries = 0;
2746 int ret = 0; 2751 int ret = 0;
@@ -2795,6 +2800,8 @@ again:
2795 2800
2796 spin_lock(&block_group->lock); 2801 spin_lock(&block_group->lock);
2797 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2802 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2803 /* We're not cached, don't bother trying to write stuff out */
2804 dcs = BTRFS_DC_WRITTEN;
2798 spin_unlock(&block_group->lock); 2805 spin_unlock(&block_group->lock);
2799 goto out_put; 2806 goto out_put;
2800 } 2807 }
@@ -2821,6 +2828,8 @@ again:
2821 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 2828 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2822 num_pages, num_pages, 2829 num_pages, num_pages,
2823 &alloc_hint); 2830 &alloc_hint);
2831 if (!ret)
2832 dcs = BTRFS_DC_SETUP;
2824 btrfs_free_reserved_data_space(inode, num_pages); 2833 btrfs_free_reserved_data_space(inode, num_pages);
2825out_put: 2834out_put:
2826 iput(inode); 2835 iput(inode);
@@ -2828,10 +2837,7 @@ out_free:
2828 btrfs_release_path(root, path); 2837 btrfs_release_path(root, path);
2829out: 2838out:
2830 spin_lock(&block_group->lock); 2839 spin_lock(&block_group->lock);
2831 if (ret) 2840 block_group->disk_cache_state = dcs;
2832 block_group->disk_cache_state = BTRFS_DC_ERROR;
2833 else
2834 block_group->disk_cache_state = BTRFS_DC_SETUP;
2835 spin_unlock(&block_group->lock); 2841 spin_unlock(&block_group->lock);
2836 2842
2837 return ret; 2843 return ret;
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3037 3043
3038u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3044u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3039{ 3045{
3040 u64 num_devices = root->fs_info->fs_devices->rw_devices; 3046 /*
3047 * we add in the count of missing devices because we want
3048 * to make sure that any RAID levels on a degraded FS
3049 * continue to be honored.
3050 */
3051 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3052 root->fs_info->fs_devices->missing_devices;
3041 3053
3042 if (num_devices == 1) 3054 if (num_devices == 1)
3043 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 3055 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -3412,7 +3424,7 @@ again:
3412 * our reservation. 3424 * our reservation.
3413 */ 3425 */
3414 if (unused <= space_info->total_bytes) { 3426 if (unused <= space_info->total_bytes) {
3415 unused -= space_info->total_bytes; 3427 unused = space_info->total_bytes - unused;
3416 if (unused >= num_bytes) { 3428 if (unused >= num_bytes) {
3417 if (!reserved) 3429 if (!reserved)
3418 space_info->bytes_reserved += orig_bytes; 3430 space_info->bytes_reserved += orig_bytes;
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4080 * space back to the block group, otherwise we will leak space. 4092 * space back to the block group, otherwise we will leak space.
4081 */ 4093 */
4082 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4094 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4083 cache_block_group(cache, trans, 1); 4095 cache_block_group(cache, trans, NULL, 1);
4084 4096
4085 byte_in_group = bytenr - cache->key.objectid; 4097 byte_in_group = bytenr - cache->key.objectid;
4086 WARN_ON(byte_in_group > cache->key.offset); 4098 WARN_ON(byte_in_group > cache->key.offset);
@@ -4930,11 +4942,31 @@ search:
4930 btrfs_get_block_group(block_group); 4942 btrfs_get_block_group(block_group);
4931 search_start = block_group->key.objectid; 4943 search_start = block_group->key.objectid;
4932 4944
4945 /*
4946 * this can happen if we end up cycling through all the
4947 * raid types, but we want to make sure we only allocate
4948 * for the proper type.
4949 */
4950 if (!block_group_bits(block_group, data)) {
4951 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4952 BTRFS_BLOCK_GROUP_RAID1 |
4953 BTRFS_BLOCK_GROUP_RAID10;
4954
4955 /*
4956 * if they asked for extra copies and this block group
4957 * doesn't provide them, bail. This does allow us to
4958 * fill raid0 from raid1.
4959 */
4960 if ((data & extra) && !(block_group->flags & extra))
4961 goto loop;
4962 }
4963
4933have_block_group: 4964have_block_group:
4934 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4965 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4935 u64 free_percent; 4966 u64 free_percent;
4936 4967
4937 ret = cache_block_group(block_group, trans, 1); 4968 ret = cache_block_group(block_group, trans,
4969 orig_root, 1);
4938 if (block_group->cached == BTRFS_CACHE_FINISHED) 4970 if (block_group->cached == BTRFS_CACHE_FINISHED)
4939 goto have_block_group; 4971 goto have_block_group;
4940 4972
@@ -4958,7 +4990,8 @@ have_block_group:
4958 if (loop > LOOP_CACHING_NOWAIT || 4990 if (loop > LOOP_CACHING_NOWAIT ||
4959 (loop > LOOP_FIND_IDEAL && 4991 (loop > LOOP_FIND_IDEAL &&
4960 atomic_read(&space_info->caching_threads) < 2)) { 4992 atomic_read(&space_info->caching_threads) < 2)) {
4961 ret = cache_block_group(block_group, trans, 0); 4993 ret = cache_block_group(block_group, trans,
4994 orig_root, 0);
4962 BUG_ON(ret); 4995 BUG_ON(ret);
4963 } 4996 }
4964 found_uncached_bg = true; 4997 found_uncached_bg = true;
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5515 u64 num_bytes = ins->offset; 5548 u64 num_bytes = ins->offset;
5516 5549
5517 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5550 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5518 cache_block_group(block_group, trans, 0); 5551 cache_block_group(block_group, trans, NULL, 0);
5519 caching_ctl = get_caching_control(block_group); 5552 caching_ctl = get_caching_control(block_group);
5520 5553
5521 if (!caching_ctl) { 5554 if (!caching_ctl) {
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6300 NULL, NULL); 6333 NULL, NULL);
6301 BUG_ON(ret < 0); 6334 BUG_ON(ret < 0);
6302 if (ret > 0) { 6335 if (ret > 0) {
6303 ret = btrfs_del_orphan_item(trans, tree_root, 6336 /* if we fail to delete the orphan item this time
6304 root->root_key.objectid); 6337 * around, it'll get picked up the next time.
6305 BUG_ON(ret); 6338 *
6339 * The most common failure here is just -ENOENT.
6340 */
6341 btrfs_del_orphan_item(trans, tree_root,
6342 root->root_key.objectid);
6306 } 6343 }
6307 } 6344 }
6308 6345
@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7878 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 7915 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7879 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 7916 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
7880 7917
7881 num_devices = root->fs_info->fs_devices->rw_devices; 7918 /*
7919 * we add in the count of missing devices because we want
7920 * to make sure that any RAID levels on a degraded FS
7921 * continue to be honored.
7922 */
7923 num_devices = root->fs_info->fs_devices->rw_devices +
7924 root->fs_info->fs_devices->missing_devices;
7925
7882 if (num_devices == 1) { 7926 if (num_devices == 1) {
7883 stripped |= BTRFS_BLOCK_GROUP_DUP; 7927 stripped |= BTRFS_BLOCK_GROUP_DUP;
7884 stripped = flags & ~stripped; 7928 stripped = flags & ~stripped;
@@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8247 break; 8291 break;
8248 if (ret != 0) 8292 if (ret != 0)
8249 goto error; 8293 goto error;
8250
8251 leaf = path->nodes[0]; 8294 leaf = path->nodes[0];
8252 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 8295 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8253 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8296 cache = kzalloc(sizeof(*cache), GFP_NOFS);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eac10e3260a9..3e86b9f36507 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1828,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
1828 bio_put(bio); 1828 bio_put(bio);
1829} 1829}
1830 1830
1831static struct bio * 1831struct bio *
1832extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, 1832btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1833 gfp_t gfp_flags) 1833 gfp_t gfp_flags)
1834{ 1834{
1835 struct bio *bio; 1835 struct bio *bio;
1836 1836
@@ -1919,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
1919 else 1919 else
1920 nr = bio_get_nr_vecs(bdev); 1920 nr = bio_get_nr_vecs(bdev);
1921 1921
1922 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH); 1922 bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1923 1923
1924 bio_add_page(bio, page, page_size, offset); 1924 bio_add_page(bio, page, page_size, offset);
1925 bio->bi_end_io = end_io_func; 1925 bio->bi_end_io = end_io_func;
@@ -2901,21 +2901,53 @@ out:
2901int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2901int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2902 __u64 start, __u64 len, get_extent_t *get_extent) 2902 __u64 start, __u64 len, get_extent_t *get_extent)
2903{ 2903{
2904 int ret; 2904 int ret = 0;
2905 u64 off = start; 2905 u64 off = start;
2906 u64 max = start + len; 2906 u64 max = start + len;
2907 u32 flags = 0; 2907 u32 flags = 0;
2908 u32 found_type;
2909 u64 last;
2908 u64 disko = 0; 2910 u64 disko = 0;
2911 struct btrfs_key found_key;
2909 struct extent_map *em = NULL; 2912 struct extent_map *em = NULL;
2910 struct extent_state *cached_state = NULL; 2913 struct extent_state *cached_state = NULL;
2914 struct btrfs_path *path;
2915 struct btrfs_file_extent_item *item;
2911 int end = 0; 2916 int end = 0;
2912 u64 em_start = 0, em_len = 0; 2917 u64 em_start = 0, em_len = 0;
2913 unsigned long emflags; 2918 unsigned long emflags;
2914 ret = 0; 2919 int hole = 0;
2915 2920
2916 if (len == 0) 2921 if (len == 0)
2917 return -EINVAL; 2922 return -EINVAL;
2918 2923
2924 path = btrfs_alloc_path();
2925 if (!path)
2926 return -ENOMEM;
2927 path->leave_spinning = 1;
2928
2929 ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
2930 path, inode->i_ino, -1, 0);
2931 if (ret < 0) {
2932 btrfs_free_path(path);
2933 return ret;
2934 }
2935 WARN_ON(!ret);
2936 path->slots[0]--;
2937 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2938 struct btrfs_file_extent_item);
2939 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
2940 found_type = btrfs_key_type(&found_key);
2941
2942 /* No extents, just return */
2943 if (found_key.objectid != inode->i_ino ||
2944 found_type != BTRFS_EXTENT_DATA_KEY) {
2945 btrfs_free_path(path);
2946 return 0;
2947 }
2948 last = found_key.offset;
2949 btrfs_free_path(path);
2950
2919 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, 2951 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2920 &cached_state, GFP_NOFS); 2952 &cached_state, GFP_NOFS);
2921 em = get_extent(inode, NULL, 0, off, max - off, 0); 2953 em = get_extent(inode, NULL, 0, off, max - off, 0);
@@ -2925,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2925 ret = PTR_ERR(em); 2957 ret = PTR_ERR(em);
2926 goto out; 2958 goto out;
2927 } 2959 }
2960
2928 while (!end) { 2961 while (!end) {
2962 hole = 0;
2929 off = em->start + em->len; 2963 off = em->start + em->len;
2930 if (off >= max) 2964 if (off >= max)
2931 end = 1; 2965 end = 1;
2932 2966
2967 if (em->block_start == EXTENT_MAP_HOLE) {
2968 hole = 1;
2969 goto next;
2970 }
2971
2933 em_start = em->start; 2972 em_start = em->start;
2934 em_len = em->len; 2973 em_len = em->len;
2935 2974
@@ -2939,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2939 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 2978 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
2940 end = 1; 2979 end = 1;
2941 flags |= FIEMAP_EXTENT_LAST; 2980 flags |= FIEMAP_EXTENT_LAST;
2942 } else if (em->block_start == EXTENT_MAP_HOLE) {
2943 flags |= FIEMAP_EXTENT_UNWRITTEN;
2944 } else if (em->block_start == EXTENT_MAP_INLINE) { 2981 } else if (em->block_start == EXTENT_MAP_INLINE) {
2945 flags |= (FIEMAP_EXTENT_DATA_INLINE | 2982 flags |= (FIEMAP_EXTENT_DATA_INLINE |
2946 FIEMAP_EXTENT_NOT_ALIGNED); 2983 FIEMAP_EXTENT_NOT_ALIGNED);
@@ -2953,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2953 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 2990 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
2954 flags |= FIEMAP_EXTENT_ENCODED; 2991 flags |= FIEMAP_EXTENT_ENCODED;
2955 2992
2993next:
2956 emflags = em->flags; 2994 emflags = em->flags;
2957 free_extent_map(em); 2995 free_extent_map(em);
2958 em = NULL; 2996 em = NULL;
2959
2960 if (!end) { 2997 if (!end) {
2961 em = get_extent(inode, NULL, 0, off, max - off, 0); 2998 em = get_extent(inode, NULL, 0, off, max - off, 0);
2962 if (!em) 2999 if (!em)
@@ -2967,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2967 } 3004 }
2968 emflags = em->flags; 3005 emflags = em->flags;
2969 } 3006 }
3007
2970 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { 3008 if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
2971 flags |= FIEMAP_EXTENT_LAST; 3009 flags |= FIEMAP_EXTENT_LAST;
2972 end = 1; 3010 end = 1;
2973 } 3011 }
2974 3012
2975 ret = fiemap_fill_next_extent(fieinfo, em_start, disko, 3013 if (em_start == last) {
2976 em_len, flags); 3014 flags |= FIEMAP_EXTENT_LAST;
2977 if (ret) 3015 end = 1;
2978 goto out_free; 3016 }
3017
3018 if (!hole) {
3019 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
3020 em_len, flags);
3021 if (ret)
3022 goto out_free;
3023 }
2979 } 3024 }
2980out_free: 3025out_free:
2981 free_extent_map(em); 3026 free_extent_map(em);
@@ -3836,8 +3881,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3836 3881
3837 spin_lock(&tree->buffer_lock); 3882 spin_lock(&tree->buffer_lock);
3838 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT); 3883 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3839 if (!eb) 3884 if (!eb) {
3840 goto out; 3885 spin_unlock(&tree->buffer_lock);
3886 return ret;
3887 }
3841 3888
3842 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3889 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3843 ret = 0; 3890 ret = 0;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 1c6d4f342ef7..4183c8178f01 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
310 struct extent_io_tree *tree, 310 struct extent_io_tree *tree,
311 u64 start, u64 end, struct page *locked_page, 311 u64 start, u64 end, struct page *locked_page,
312 unsigned long op); 312 unsigned long op);
313struct bio *
314btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
315 gfp_t gfp_flags);
313#endif 316#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e354c33df082..66836d85763b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
48 struct page **prepared_pages, 48 struct page **prepared_pages,
49 struct iov_iter *i) 49 struct iov_iter *i)
50{ 50{
51 size_t copied; 51 size_t copied = 0;
52 int pg = 0; 52 int pg = 0;
53 int offset = pos & (PAGE_CACHE_SIZE - 1); 53 int offset = pos & (PAGE_CACHE_SIZE - 1);
54 int total_copied = 0;
54 55
55 while (write_bytes > 0) { 56 while (write_bytes > 0) {
56 size_t count = min_t(size_t, 57 size_t count = min_t(size_t,
57 PAGE_CACHE_SIZE - offset, write_bytes); 58 PAGE_CACHE_SIZE - offset, write_bytes);
58 struct page *page = prepared_pages[pg]; 59 struct page *page = prepared_pages[pg];
59again: 60 /*
60 if (unlikely(iov_iter_fault_in_readable(i, count))) 61 * Copy data from userspace to the current page
61 return -EFAULT; 62 *
62 63 * Disable pagefault to avoid recursive lock since
63 /* Copy data from userspace to the current page */ 64 * the pages are already locked
64 copied = iov_iter_copy_from_user(page, i, offset, count); 65 */
66 pagefault_disable();
67 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
68 pagefault_enable();
65 69
66 /* Flush processor's dcache for this page */ 70 /* Flush processor's dcache for this page */
67 flush_dcache_page(page); 71 flush_dcache_page(page);
68 iov_iter_advance(i, copied); 72 iov_iter_advance(i, copied);
69 write_bytes -= copied; 73 write_bytes -= copied;
74 total_copied += copied;
70 75
76 /* Return to btrfs_file_aio_write to fault page */
71 if (unlikely(copied == 0)) { 77 if (unlikely(copied == 0)) {
72 count = min_t(size_t, PAGE_CACHE_SIZE - offset, 78 break;
73 iov_iter_single_seg_count(i));
74 goto again;
75 } 79 }
76 80
77 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { 81 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ again:
81 offset = 0; 85 offset = 0;
82 } 86 }
83 } 87 }
84 return 0; 88 return total_copied;
85} 89}
86 90
87/* 91/*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
854 unsigned long last_index; 858 unsigned long last_index;
855 int will_write; 859 int will_write;
856 int buffered = 0; 860 int buffered = 0;
861 int copied = 0;
862 int dirty_pages = 0;
857 863
858 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 864 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
859 (file->f_flags & O_DIRECT)); 865 (file->f_flags & O_DIRECT));
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
970 WARN_ON(num_pages > nrptrs); 976 WARN_ON(num_pages > nrptrs);
971 memset(pages, 0, sizeof(struct page *) * nrptrs); 977 memset(pages, 0, sizeof(struct page *) * nrptrs);
972 978
973 ret = btrfs_delalloc_reserve_space(inode, write_bytes); 979 /*
980 * Fault pages before locking them in prepare_pages
981 * to avoid recursive lock
982 */
983 if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
984 ret = -EFAULT;
985 goto out;
986 }
987
988 ret = btrfs_delalloc_reserve_space(inode,
989 num_pages << PAGE_CACHE_SHIFT);
974 if (ret) 990 if (ret)
975 goto out; 991 goto out;
976 992
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
978 pos, first_index, last_index, 994 pos, first_index, last_index,
979 write_bytes); 995 write_bytes);
980 if (ret) { 996 if (ret) {
981 btrfs_delalloc_release_space(inode, write_bytes); 997 btrfs_delalloc_release_space(inode,
998 num_pages << PAGE_CACHE_SHIFT);
982 goto out; 999 goto out;
983 } 1000 }
984 1001
985 ret = btrfs_copy_from_user(pos, num_pages, 1002 copied = btrfs_copy_from_user(pos, num_pages,
986 write_bytes, pages, &i); 1003 write_bytes, pages, &i);
987 if (ret == 0) { 1004 dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
1005 PAGE_CACHE_SHIFT;
1006
1007 if (num_pages > dirty_pages) {
1008 if (copied > 0)
1009 atomic_inc(
1010 &BTRFS_I(inode)->outstanding_extents);
1011 btrfs_delalloc_release_space(inode,
1012 (num_pages - dirty_pages) <<
1013 PAGE_CACHE_SHIFT);
1014 }
1015
1016 if (copied > 0) {
988 dirty_and_release_pages(NULL, root, file, pages, 1017 dirty_and_release_pages(NULL, root, file, pages,
989 num_pages, pos, write_bytes); 1018 dirty_pages, pos, copied);
990 } 1019 }
991 1020
992 btrfs_drop_pages(pages, num_pages); 1021 btrfs_drop_pages(pages, num_pages);
993 if (ret) {
994 btrfs_delalloc_release_space(inode, write_bytes);
995 goto out;
996 }
997 1022
998 if (will_write) { 1023 if (copied > 0) {
999 filemap_fdatawrite_range(inode->i_mapping, pos, 1024 if (will_write) {
1000 pos + write_bytes - 1); 1025 filemap_fdatawrite_range(inode->i_mapping, pos,
1001 } else { 1026 pos + copied - 1);
1002 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1027 } else {
1003 num_pages); 1028 balance_dirty_pages_ratelimited_nr(
1004 if (num_pages < 1029 inode->i_mapping,
1005 (root->leafsize >> PAGE_CACHE_SHIFT) + 1) 1030 dirty_pages);
1006 btrfs_btree_balance_dirty(root, 1); 1031 if (dirty_pages <
1007 btrfs_throttle(root); 1032 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1033 btrfs_btree_balance_dirty(root, 1);
1034 btrfs_throttle(root);
1035 }
1008 } 1036 }
1009 1037
1010 pos += write_bytes; 1038 pos += copied;
1011 num_written += write_bytes; 1039 num_written += copied;
1012 1040
1013 cond_resched(); 1041 cond_resched();
1014 } 1042 }
@@ -1047,8 +1075,14 @@ out:
1047 1075
1048 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { 1076 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
1049 trans = btrfs_start_transaction(root, 0); 1077 trans = btrfs_start_transaction(root, 0);
1078 if (IS_ERR(trans)) {
1079 num_written = PTR_ERR(trans);
1080 goto done;
1081 }
1082 mutex_lock(&inode->i_mutex);
1050 ret = btrfs_log_dentry_safe(trans, root, 1083 ret = btrfs_log_dentry_safe(trans, root,
1051 file->f_dentry); 1084 file->f_dentry);
1085 mutex_unlock(&inode->i_mutex);
1052 if (ret == 0) { 1086 if (ret == 0) {
1053 ret = btrfs_sync_log(trans, root); 1087 ret = btrfs_sync_log(trans, root);
1054 if (ret == 0) 1088 if (ret == 0)
@@ -1067,6 +1101,7 @@ out:
1067 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); 1101 (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
1068 } 1102 }
1069 } 1103 }
1104done:
1070 current->backing_dev_info = NULL; 1105 current->backing_dev_info = NULL;
1071 return num_written ? num_written : err; 1106 return num_written ? num_written : err;
1072} 1107}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 22ee0dc2e6b8..60d684266959 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
290 (unsigned long long)BTRFS_I(inode)->generation, 290 (unsigned long long)BTRFS_I(inode)->generation,
291 (unsigned long long)generation, 291 (unsigned long long)generation,
292 (unsigned long long)block_group->key.objectid); 292 (unsigned long long)block_group->key.objectid);
293 goto out; 293 goto free_cache;
294 } 294 }
295 295
296 if (!num_entries) 296 if (!num_entries)
@@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
524 return 0; 524 return 0;
525 } 525 }
526 526
527 node = rb_first(&block_group->free_space_offset);
528 if (!node) {
529 iput(inode);
530 return 0;
531 }
532
527 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 533 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
528 filemap_write_and_wait(inode->i_mapping); 534 filemap_write_and_wait(inode->i_mapping);
529 btrfs_wait_ordered_range(inode, inode->i_size & 535 btrfs_wait_ordered_range(inode, inode->i_size &
@@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
543 */ 549 */
544 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 550 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
545 551
546 node = rb_first(&block_group->free_space_offset);
547 if (!node)
548 goto out_free;
549
550 /* 552 /*
551 * Lock all pages first so we can lock the extent safely. 553 * Lock all pages first so we can lock the extent safely.
552 * 554 *
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 558cac2dfa54..a0ff46a47895 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -495,7 +495,7 @@ again:
495 add_async_extent(async_cow, start, num_bytes, 495 add_async_extent(async_cow, start, num_bytes,
496 total_compressed, pages, nr_pages_ret); 496 total_compressed, pages, nr_pages_ret);
497 497
498 if (start + num_bytes < end && start + num_bytes < actual_end) { 498 if (start + num_bytes < end) {
499 start += num_bytes; 499 start += num_bytes;
500 pages = NULL; 500 pages = NULL;
501 cond_resched(); 501 cond_resched();
@@ -4084,7 +4084,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4084 int index; 4084 int index;
4085 int ret; 4085 int ret;
4086 4086
4087 dentry->d_op = &btrfs_dentry_operations; 4087 d_set_d_op(dentry, &btrfs_dentry_operations);
4088 4088
4089 if (dentry->d_name.len > BTRFS_NAME_LEN) 4089 if (dentry->d_name.len > BTRFS_NAME_LEN)
4090 return ERR_PTR(-ENAMETOOLONG); 4090 return ERR_PTR(-ENAMETOOLONG);
@@ -4127,7 +4127,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
4127 return inode; 4127 return inode;
4128} 4128}
4129 4129
4130static int btrfs_dentry_delete(struct dentry *dentry) 4130static int btrfs_dentry_delete(const struct dentry *dentry)
4131{ 4131{
4132 struct btrfs_root *root; 4132 struct btrfs_root *root;
4133 4133
@@ -4501,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4501 BTRFS_I(inode)->index_cnt = 2; 4501 BTRFS_I(inode)->index_cnt = 2;
4502 BTRFS_I(inode)->root = root; 4502 BTRFS_I(inode)->root = root;
4503 BTRFS_I(inode)->generation = trans->transid; 4503 BTRFS_I(inode)->generation = trans->transid;
4504 inode->i_generation = BTRFS_I(inode)->generation;
4504 btrfs_set_inode_space_info(root, inode); 4505 btrfs_set_inode_space_info(root, inode);
4505 4506
4506 if (mode & S_IFDIR) 4507 if (mode & S_IFDIR)
@@ -4622,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
4622} 4623}
4623 4624
4624static int btrfs_add_nondir(struct btrfs_trans_handle *trans, 4625static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
4625 struct dentry *dentry, struct inode *inode, 4626 struct inode *dir, struct dentry *dentry,
4626 int backref, u64 index) 4627 struct inode *inode, int backref, u64 index)
4627{ 4628{
4628 int err = btrfs_add_link(trans, dentry->d_parent->d_inode, 4629 int err = btrfs_add_link(trans, dir, inode,
4629 inode, dentry->d_name.name, 4630 dentry->d_name.name, dentry->d_name.len,
4630 dentry->d_name.len, backref, index); 4631 backref, index);
4631 if (!err) { 4632 if (!err) {
4632 d_instantiate(dentry, inode); 4633 d_instantiate(dentry, inode);
4633 return 0; 4634 return 0;
@@ -4668,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4668 btrfs_set_trans_block_group(trans, dir); 4669 btrfs_set_trans_block_group(trans, dir);
4669 4670
4670 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4671 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4671 dentry->d_name.len, 4672 dentry->d_name.len, dir->i_ino, objectid,
4672 dentry->d_parent->d_inode->i_ino, objectid,
4673 BTRFS_I(dir)->block_group, mode, &index); 4673 BTRFS_I(dir)->block_group, mode, &index);
4674 err = PTR_ERR(inode); 4674 err = PTR_ERR(inode);
4675 if (IS_ERR(inode)) 4675 if (IS_ERR(inode))
@@ -4682,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4682 } 4682 }
4683 4683
4684 btrfs_set_trans_block_group(trans, inode); 4684 btrfs_set_trans_block_group(trans, inode);
4685 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 4685 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4686 if (err) 4686 if (err)
4687 drop_inode = 1; 4687 drop_inode = 1;
4688 else { 4688 else {
@@ -4730,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4730 btrfs_set_trans_block_group(trans, dir); 4730 btrfs_set_trans_block_group(trans, dir);
4731 4731
4732 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4732 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4733 dentry->d_name.len, 4733 dentry->d_name.len, dir->i_ino, objectid,
4734 dentry->d_parent->d_inode->i_ino, 4734 BTRFS_I(dir)->block_group, mode, &index);
4735 objectid, BTRFS_I(dir)->block_group, mode,
4736 &index);
4737 err = PTR_ERR(inode); 4735 err = PTR_ERR(inode);
4738 if (IS_ERR(inode)) 4736 if (IS_ERR(inode))
4739 goto out_unlock; 4737 goto out_unlock;
@@ -4745,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4745 } 4743 }
4746 4744
4747 btrfs_set_trans_block_group(trans, inode); 4745 btrfs_set_trans_block_group(trans, inode);
4748 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 4746 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4749 if (err) 4747 if (err)
4750 drop_inode = 1; 4748 drop_inode = 1;
4751 else { 4749 else {
@@ -4787,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4787 return -EPERM; 4785 return -EPERM;
4788 4786
4789 btrfs_inc_nlink(inode); 4787 btrfs_inc_nlink(inode);
4788 inode->i_ctime = CURRENT_TIME;
4790 4789
4791 err = btrfs_set_inode_index(dir, &index); 4790 err = btrfs_set_inode_index(dir, &index);
4792 if (err) 4791 if (err)
@@ -4805,15 +4804,17 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4805 btrfs_set_trans_block_group(trans, dir); 4804 btrfs_set_trans_block_group(trans, dir);
4806 ihold(inode); 4805 ihold(inode);
4807 4806
4808 err = btrfs_add_nondir(trans, dentry, inode, 1, index); 4807 err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
4809 4808
4810 if (err) { 4809 if (err) {
4811 drop_inode = 1; 4810 drop_inode = 1;
4812 } else { 4811 } else {
4812 struct dentry *parent = dget_parent(dentry);
4813 btrfs_update_inode_block_group(trans, dir); 4813 btrfs_update_inode_block_group(trans, dir);
4814 err = btrfs_update_inode(trans, root, inode); 4814 err = btrfs_update_inode(trans, root, inode);
4815 BUG_ON(err); 4815 BUG_ON(err);
4816 btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); 4816 btrfs_log_new_name(trans, inode, NULL, parent);
4817 dput(parent);
4817 } 4818 }
4818 4819
4819 nr = trans->blocks_used; 4820 nr = trans->blocks_used;
@@ -4853,8 +4854,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4853 btrfs_set_trans_block_group(trans, dir); 4854 btrfs_set_trans_block_group(trans, dir);
4854 4855
4855 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4856 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
4856 dentry->d_name.len, 4857 dentry->d_name.len, dir->i_ino, objectid,
4857 dentry->d_parent->d_inode->i_ino, objectid,
4858 BTRFS_I(dir)->block_group, S_IFDIR | mode, 4858 BTRFS_I(dir)->block_group, S_IFDIR | mode,
4859 &index); 4859 &index);
4860 if (IS_ERR(inode)) { 4860 if (IS_ERR(inode)) {
@@ -4877,9 +4877,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4877 if (err) 4877 if (err)
4878 goto out_fail; 4878 goto out_fail;
4879 4879
4880 err = btrfs_add_link(trans, dentry->d_parent->d_inode, 4880 err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
4881 inode, dentry->d_name.name, 4881 dentry->d_name.len, 0, index);
4882 dentry->d_name.len, 0, index);
4883 if (err) 4882 if (err)
4884 goto out_fail; 4883 goto out_fail;
4885 4884
@@ -5535,13 +5534,21 @@ struct btrfs_dio_private {
5535 u64 bytes; 5534 u64 bytes;
5536 u32 *csums; 5535 u32 *csums;
5537 void *private; 5536 void *private;
5537
5538 /* number of bios pending for this dio */
5539 atomic_t pending_bios;
5540
5541 /* IO errors */
5542 int errors;
5543
5544 struct bio *orig_bio;
5538}; 5545};
5539 5546
5540static void btrfs_endio_direct_read(struct bio *bio, int err) 5547static void btrfs_endio_direct_read(struct bio *bio, int err)
5541{ 5548{
5549 struct btrfs_dio_private *dip = bio->bi_private;
5542 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; 5550 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
5543 struct bio_vec *bvec = bio->bi_io_vec; 5551 struct bio_vec *bvec = bio->bi_io_vec;
5544 struct btrfs_dio_private *dip = bio->bi_private;
5545 struct inode *inode = dip->inode; 5552 struct inode *inode = dip->inode;
5546 struct btrfs_root *root = BTRFS_I(inode)->root; 5553 struct btrfs_root *root = BTRFS_I(inode)->root;
5547 u64 start; 5554 u64 start;
@@ -5595,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
5595 struct btrfs_trans_handle *trans; 5602 struct btrfs_trans_handle *trans;
5596 struct btrfs_ordered_extent *ordered = NULL; 5603 struct btrfs_ordered_extent *ordered = NULL;
5597 struct extent_state *cached_state = NULL; 5604 struct extent_state *cached_state = NULL;
5605 u64 ordered_offset = dip->logical_offset;
5606 u64 ordered_bytes = dip->bytes;
5598 int ret; 5607 int ret;
5599 5608
5600 if (err) 5609 if (err)
5601 goto out_done; 5610 goto out_done;
5602 5611again:
5603 ret = btrfs_dec_test_ordered_pending(inode, &ordered, 5612 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
5604 dip->logical_offset, dip->bytes); 5613 &ordered_offset,
5614 ordered_bytes);
5605 if (!ret) 5615 if (!ret)
5606 goto out_done; 5616 goto out_test;
5607 5617
5608 BUG_ON(!ordered); 5618 BUG_ON(!ordered);
5609 5619
@@ -5663,8 +5673,20 @@ out_unlock:
5663out: 5673out:
5664 btrfs_delalloc_release_metadata(inode, ordered->len); 5674 btrfs_delalloc_release_metadata(inode, ordered->len);
5665 btrfs_end_transaction(trans, root); 5675 btrfs_end_transaction(trans, root);
5676 ordered_offset = ordered->file_offset + ordered->len;
5666 btrfs_put_ordered_extent(ordered); 5677 btrfs_put_ordered_extent(ordered);
5667 btrfs_put_ordered_extent(ordered); 5678 btrfs_put_ordered_extent(ordered);
5679
5680out_test:
5681 /*
5682 * our bio might span multiple ordered extents. If we haven't
5683 * completed the accounting for the whole dio, go back and try again
5684 */
5685 if (ordered_offset < dip->logical_offset + dip->bytes) {
5686 ordered_bytes = dip->logical_offset + dip->bytes -
5687 ordered_offset;
5688 goto again;
5689 }
5668out_done: 5690out_done:
5669 bio->bi_private = dip->private; 5691 bio->bi_private = dip->private;
5670 5692
@@ -5684,6 +5706,176 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
5684 return 0; 5706 return 0;
5685} 5707}
5686 5708
5709static void btrfs_end_dio_bio(struct bio *bio, int err)
5710{
5711 struct btrfs_dio_private *dip = bio->bi_private;
5712
5713 if (err) {
5714 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
5715 "sector %#Lx len %u err no %d\n",
5716 dip->inode->i_ino, bio->bi_rw,
5717 (unsigned long long)bio->bi_sector, bio->bi_size, err);
5718 dip->errors = 1;
5719
5720 /*
5721 * before atomic variable goto zero, we must make sure
5722 * dip->errors is perceived to be set.
5723 */
5724 smp_mb__before_atomic_dec();
5725 }
5726
5727 /* if there are more bios still pending for this dio, just exit */
5728 if (!atomic_dec_and_test(&dip->pending_bios))
5729 goto out;
5730
5731 if (dip->errors)
5732 bio_io_error(dip->orig_bio);
5733 else {
5734 set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
5735 bio_endio(dip->orig_bio, 0);
5736 }
5737out:
5738 bio_put(bio);
5739}
5740
5741static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
5742 u64 first_sector, gfp_t gfp_flags)
5743{
5744 int nr_vecs = bio_get_nr_vecs(bdev);
5745 return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
5746}
5747
5748static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
5749 int rw, u64 file_offset, int skip_sum,
5750 u32 *csums)
5751{
5752 int write = rw & REQ_WRITE;
5753 struct btrfs_root *root = BTRFS_I(inode)->root;
5754 int ret;
5755
5756 bio_get(bio);
5757 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
5758 if (ret)
5759 goto err;
5760
5761 if (write && !skip_sum) {
5762 ret = btrfs_wq_submit_bio(root->fs_info,
5763 inode, rw, bio, 0, 0,
5764 file_offset,
5765 __btrfs_submit_bio_start_direct_io,
5766 __btrfs_submit_bio_done);
5767 goto err;
5768 } else if (!skip_sum)
5769 btrfs_lookup_bio_sums_dio(root, inode, bio,
5770 file_offset, csums);
5771
5772 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5773err:
5774 bio_put(bio);
5775 return ret;
5776}
5777
5778static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
5779 int skip_sum)
5780{
5781 struct inode *inode = dip->inode;
5782 struct btrfs_root *root = BTRFS_I(inode)->root;
5783 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
5784 struct bio *bio;
5785 struct bio *orig_bio = dip->orig_bio;
5786 struct bio_vec *bvec = orig_bio->bi_io_vec;
5787 u64 start_sector = orig_bio->bi_sector;
5788 u64 file_offset = dip->logical_offset;
5789 u64 submit_len = 0;
5790 u64 map_length;
5791 int nr_pages = 0;
5792 u32 *csums = dip->csums;
5793 int ret = 0;
5794
5795 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
5796 if (!bio)
5797 return -ENOMEM;
5798 bio->bi_private = dip;
5799 bio->bi_end_io = btrfs_end_dio_bio;
5800 atomic_inc(&dip->pending_bios);
5801
5802 map_length = orig_bio->bi_size;
5803 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5804 &map_length, NULL, 0);
5805 if (ret) {
5806 bio_put(bio);
5807 return -EIO;
5808 }
5809
5810 while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
5811 if (unlikely(map_length < submit_len + bvec->bv_len ||
5812 bio_add_page(bio, bvec->bv_page, bvec->bv_len,
5813 bvec->bv_offset) < bvec->bv_len)) {
5814 /*
5815 * inc the count before we submit the bio so
5816 * we know the end IO handler won't happen before
5817 * we inc the count. Otherwise, the dip might get freed
5818 * before we're done setting it up
5819 */
5820 atomic_inc(&dip->pending_bios);
5821 ret = __btrfs_submit_dio_bio(bio, inode, rw,
5822 file_offset, skip_sum,
5823 csums);
5824 if (ret) {
5825 bio_put(bio);
5826 atomic_dec(&dip->pending_bios);
5827 goto out_err;
5828 }
5829
5830 if (!skip_sum)
5831 csums = csums + nr_pages;
5832 start_sector += submit_len >> 9;
5833 file_offset += submit_len;
5834
5835 submit_len = 0;
5836 nr_pages = 0;
5837
5838 bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
5839 start_sector, GFP_NOFS);
5840 if (!bio)
5841 goto out_err;
5842 bio->bi_private = dip;
5843 bio->bi_end_io = btrfs_end_dio_bio;
5844
5845 map_length = orig_bio->bi_size;
5846 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
5847 &map_length, NULL, 0);
5848 if (ret) {
5849 bio_put(bio);
5850 goto out_err;
5851 }
5852 } else {
5853 submit_len += bvec->bv_len;
5854 nr_pages ++;
5855 bvec++;
5856 }
5857 }
5858
5859 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
5860 csums);
5861 if (!ret)
5862 return 0;
5863
5864 bio_put(bio);
5865out_err:
5866 dip->errors = 1;
5867 /*
5868 * before atomic variable goto zero, we must
5869 * make sure dip->errors is perceived to be set.
5870 */
5871 smp_mb__before_atomic_dec();
5872 if (atomic_dec_and_test(&dip->pending_bios))
5873 bio_io_error(dip->orig_bio);
5874
5875 /* bio_end_io() will handle error, so we needn't return it */
5876 return 0;
5877}
5878
5687static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, 5879static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5688 loff_t file_offset) 5880 loff_t file_offset)
5689{ 5881{
@@ -5723,36 +5915,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5723 5915
5724 dip->disk_bytenr = (u64)bio->bi_sector << 9; 5916 dip->disk_bytenr = (u64)bio->bi_sector << 9;
5725 bio->bi_private = dip; 5917 bio->bi_private = dip;
5918 dip->errors = 0;
5919 dip->orig_bio = bio;
5920 atomic_set(&dip->pending_bios, 0);
5726 5921
5727 if (write) 5922 if (write)
5728 bio->bi_end_io = btrfs_endio_direct_write; 5923 bio->bi_end_io = btrfs_endio_direct_write;
5729 else 5924 else
5730 bio->bi_end_io = btrfs_endio_direct_read; 5925 bio->bi_end_io = btrfs_endio_direct_read;
5731 5926
5732 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 5927 ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
5733 if (ret) 5928 if (!ret)
5734 goto out_err;
5735
5736 if (write && !skip_sum) {
5737 ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
5738 inode, rw, bio, 0, 0,
5739 dip->logical_offset,
5740 __btrfs_submit_bio_start_direct_io,
5741 __btrfs_submit_bio_done);
5742 if (ret)
5743 goto out_err;
5744 return; 5929 return;
5745 } else if (!skip_sum)
5746 btrfs_lookup_bio_sums_dio(root, inode, bio,
5747 dip->logical_offset, dip->csums);
5748
5749 ret = btrfs_map_bio(root, rw, bio, 0, 1);
5750 if (ret)
5751 goto out_err;
5752 return;
5753out_err:
5754 kfree(dip->csums);
5755 kfree(dip);
5756free_ordered: 5930free_ordered:
5757 /* 5931 /*
5758 * If this is a write, we need to clean up the reserved space and kill 5932 * If this is a write, we need to clean up the reserved space and kill
@@ -5760,8 +5934,7 @@ free_ordered:
5760 */ 5934 */
5761 if (write) { 5935 if (write) {
5762 struct btrfs_ordered_extent *ordered; 5936 struct btrfs_ordered_extent *ordered;
5763 ordered = btrfs_lookup_ordered_extent(inode, 5937 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
5764 dip->logical_offset);
5765 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && 5938 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
5766 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 5939 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
5767 btrfs_free_reserved_extent(root, ordered->start, 5940 btrfs_free_reserved_extent(root, ordered->start,
@@ -6322,6 +6495,13 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6322 return inode; 6495 return inode;
6323} 6496}
6324 6497
6498static void btrfs_i_callback(struct rcu_head *head)
6499{
6500 struct inode *inode = container_of(head, struct inode, i_rcu);
6501 INIT_LIST_HEAD(&inode->i_dentry);
6502 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
6503}
6504
6325void btrfs_destroy_inode(struct inode *inode) 6505void btrfs_destroy_inode(struct inode *inode)
6326{ 6506{
6327 struct btrfs_ordered_extent *ordered; 6507 struct btrfs_ordered_extent *ordered;
@@ -6391,7 +6571,7 @@ void btrfs_destroy_inode(struct inode *inode)
6391 inode_tree_del(inode); 6571 inode_tree_del(inode);
6392 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); 6572 btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
6393free: 6573free:
6394 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6574 call_rcu(&inode->i_rcu, btrfs_i_callback);
6395} 6575}
6396 6576
6397int btrfs_drop_inode(struct inode *inode) 6577int btrfs_drop_inode(struct inode *inode)
@@ -6607,8 +6787,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
6607 BUG_ON(ret); 6787 BUG_ON(ret);
6608 6788
6609 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 6789 if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
6610 btrfs_log_new_name(trans, old_inode, old_dir, 6790 struct dentry *parent = dget_parent(new_dentry);
6611 new_dentry->d_parent); 6791 btrfs_log_new_name(trans, old_inode, old_dir, parent);
6792 dput(parent);
6612 btrfs_end_log_trans(root); 6793 btrfs_end_log_trans(root);
6613 } 6794 }
6614out_fail: 6795out_fail:
@@ -6758,8 +6939,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
6758 btrfs_set_trans_block_group(trans, dir); 6939 btrfs_set_trans_block_group(trans, dir);
6759 6940
6760 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 6941 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
6761 dentry->d_name.len, 6942 dentry->d_name.len, dir->i_ino, objectid,
6762 dentry->d_parent->d_inode->i_ino, objectid,
6763 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO, 6943 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
6764 &index); 6944 &index);
6765 err = PTR_ERR(inode); 6945 err = PTR_ERR(inode);
@@ -6773,7 +6953,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
6773 } 6953 }
6774 6954
6775 btrfs_set_trans_block_group(trans, inode); 6955 btrfs_set_trans_block_group(trans, inode);
6776 err = btrfs_add_nondir(trans, dentry, inode, 0, index); 6956 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
6777 if (err) 6957 if (err)
6778 drop_inode = 1; 6958 drop_inode = 1;
6779 else { 6959 else {
@@ -6844,6 +7024,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
6844 struct btrfs_root *root = BTRFS_I(inode)->root; 7024 struct btrfs_root *root = BTRFS_I(inode)->root;
6845 struct btrfs_key ins; 7025 struct btrfs_key ins;
6846 u64 cur_offset = start; 7026 u64 cur_offset = start;
7027 u64 i_size;
6847 int ret = 0; 7028 int ret = 0;
6848 bool own_trans = true; 7029 bool own_trans = true;
6849 7030
@@ -6885,11 +7066,11 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
6885 (actual_len > inode->i_size) && 7066 (actual_len > inode->i_size) &&
6886 (cur_offset > inode->i_size)) { 7067 (cur_offset > inode->i_size)) {
6887 if (cur_offset > actual_len) 7068 if (cur_offset > actual_len)
6888 i_size_write(inode, actual_len); 7069 i_size = actual_len;
6889 else 7070 else
6890 i_size_write(inode, cur_offset); 7071 i_size = cur_offset;
6891 i_size_write(inode, cur_offset); 7072 i_size_write(inode, i_size);
6892 btrfs_ordered_update_i_size(inode, cur_offset, NULL); 7073 btrfs_ordered_update_i_size(inode, i_size, NULL);
6893 } 7074 }
6894 7075
6895 ret = btrfs_update_inode(trans, root, inode); 7076 ret = btrfs_update_inode(trans, root, inode);
@@ -6943,6 +7124,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
6943 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start); 7124 btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
6944 7125
6945 mutex_lock(&inode->i_mutex); 7126 mutex_lock(&inode->i_mutex);
7127 ret = inode_newsize_ok(inode, alloc_end);
7128 if (ret)
7129 goto out;
7130
6946 if (alloc_start > inode->i_size) { 7131 if (alloc_start > inode->i_size) {
6947 ret = btrfs_cont_expand(inode, alloc_start); 7132 ret = btrfs_cont_expand(inode, alloc_start);
6948 if (ret) 7133 if (ret)
@@ -7026,11 +7211,11 @@ static int btrfs_set_page_dirty(struct page *page)
7026 return __set_page_dirty_nobuffers(page); 7211 return __set_page_dirty_nobuffers(page);
7027} 7212}
7028 7213
7029static int btrfs_permission(struct inode *inode, int mask) 7214static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
7030{ 7215{
7031 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7216 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
7032 return -EACCES; 7217 return -EACCES;
7033 return generic_permission(inode, mask, btrfs_check_acl); 7218 return generic_permission(inode, mask, flags, btrfs_check_acl);
7034} 7219}
7035 7220
7036static const struct inode_operations btrfs_dir_inode_operations = { 7221static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7139,6 +7324,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7139 .readlink = generic_readlink, 7324 .readlink = generic_readlink,
7140 .follow_link = page_follow_link_light, 7325 .follow_link = page_follow_link_light,
7141 .put_link = page_put_link, 7326 .put_link = page_put_link,
7327 .getattr = btrfs_getattr,
7142 .permission = btrfs_permission, 7328 .permission = btrfs_permission,
7143 .setxattr = btrfs_setxattr, 7329 .setxattr = btrfs_setxattr,
7144 .getxattr = btrfs_getxattr, 7330 .getxattr = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 463d91b4dd3a..f87552a1d7ea 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -233,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root,
233 struct btrfs_inode_item *inode_item; 233 struct btrfs_inode_item *inode_item;
234 struct extent_buffer *leaf; 234 struct extent_buffer *leaf;
235 struct btrfs_root *new_root; 235 struct btrfs_root *new_root;
236 struct inode *dir = dentry->d_parent->d_inode; 236 struct dentry *parent = dget_parent(dentry);
237 struct inode *dir;
237 int ret; 238 int ret;
238 int err; 239 int err;
239 u64 objectid; 240 u64 objectid;
@@ -242,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root,
242 243
243 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, 244 ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
244 0, &objectid); 245 0, &objectid);
245 if (ret) 246 if (ret) {
247 dput(parent);
246 return ret; 248 return ret;
249 }
250
251 dir = parent->d_inode;
252
247 /* 253 /*
248 * 1 - inode item 254 * 1 - inode item
249 * 2 - refs 255 * 2 - refs
@@ -251,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root,
251 * 2 - dir items 257 * 2 - dir items
252 */ 258 */
253 trans = btrfs_start_transaction(root, 6); 259 trans = btrfs_start_transaction(root, 6);
254 if (IS_ERR(trans)) 260 if (IS_ERR(trans)) {
261 dput(parent);
255 return PTR_ERR(trans); 262 return PTR_ERR(trans);
263 }
256 264
257 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 265 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
258 0, objectid, NULL, 0, 0, 0); 266 0, objectid, NULL, 0, 0, 0);
@@ -339,6 +347,7 @@ static noinline int create_subvol(struct btrfs_root *root,
339 347
340 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 348 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
341fail: 349fail:
350 dput(parent);
342 if (async_transid) { 351 if (async_transid) {
343 *async_transid = trans->transid; 352 *async_transid = trans->transid;
344 err = btrfs_commit_transaction_async(trans, root, 1); 353 err = btrfs_commit_transaction_async(trans, root, 1);
@@ -354,6 +363,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
354 char *name, int namelen, u64 *async_transid) 363 char *name, int namelen, u64 *async_transid)
355{ 364{
356 struct inode *inode; 365 struct inode *inode;
366 struct dentry *parent;
357 struct btrfs_pending_snapshot *pending_snapshot; 367 struct btrfs_pending_snapshot *pending_snapshot;
358 struct btrfs_trans_handle *trans; 368 struct btrfs_trans_handle *trans;
359 int ret; 369 int ret;
@@ -396,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
396 406
397 btrfs_orphan_cleanup(pending_snapshot->snap); 407 btrfs_orphan_cleanup(pending_snapshot->snap);
398 408
399 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 409 parent = dget_parent(dentry);
410 inode = btrfs_lookup_dentry(parent->d_inode, dentry);
411 dput(parent);
400 if (IS_ERR(inode)) { 412 if (IS_ERR(inode)) {
401 ret = PTR_ERR(inode); 413 ret = PTR_ERR(inode);
402 goto fail; 414 goto fail;
@@ -935,23 +947,42 @@ out:
935 947
936static noinline int btrfs_ioctl_snap_create(struct file *file, 948static noinline int btrfs_ioctl_snap_create(struct file *file,
937 void __user *arg, int subvol, 949 void __user *arg, int subvol,
938 int async) 950 int v2)
939{ 951{
940 struct btrfs_ioctl_vol_args *vol_args = NULL; 952 struct btrfs_ioctl_vol_args *vol_args = NULL;
941 struct btrfs_ioctl_async_vol_args *async_vol_args = NULL; 953 struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
942 char *name; 954 char *name;
943 u64 fd; 955 u64 fd;
944 u64 transid = 0;
945 int ret; 956 int ret;
946 957
947 if (async) { 958 if (v2) {
948 async_vol_args = memdup_user(arg, sizeof(*async_vol_args)); 959 u64 transid = 0;
949 if (IS_ERR(async_vol_args)) 960 u64 *ptr = NULL;
950 return PTR_ERR(async_vol_args); 961
962 vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
963 if (IS_ERR(vol_args_v2))
964 return PTR_ERR(vol_args_v2);
965
966 if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
967 ret = -EINVAL;
968 goto out;
969 }
970
971 name = vol_args_v2->name;
972 fd = vol_args_v2->fd;
973 vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
974
975 if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
976 ptr = &transid;
977
978 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
979 subvol, ptr);
951 980
952 name = async_vol_args->name; 981 if (ret == 0 && ptr &&
953 fd = async_vol_args->fd; 982 copy_to_user(arg +
954 async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0'; 983 offsetof(struct btrfs_ioctl_vol_args_v2,
984 transid), ptr, sizeof(*ptr)))
985 ret = -EFAULT;
955 } else { 986 } else {
956 vol_args = memdup_user(arg, sizeof(*vol_args)); 987 vol_args = memdup_user(arg, sizeof(*vol_args));
957 if (IS_ERR(vol_args)) 988 if (IS_ERR(vol_args))
@@ -959,20 +990,13 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
959 name = vol_args->name; 990 name = vol_args->name;
960 fd = vol_args->fd; 991 fd = vol_args->fd;
961 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 992 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
962 }
963
964 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
965 subvol, &transid);
966 993
967 if (!ret && async) { 994 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
968 if (copy_to_user(arg + 995 subvol, NULL);
969 offsetof(struct btrfs_ioctl_async_vol_args,
970 transid), &transid, sizeof(transid)))
971 return -EFAULT;
972 } 996 }
973 997out:
974 kfree(vol_args); 998 kfree(vol_args);
975 kfree(async_vol_args); 999 kfree(vol_args_v2);
976 1000
977 return ret; 1001 return ret;
978} 1002}
@@ -1669,12 +1693,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1669 olen = len = src->i_size - off; 1693 olen = len = src->i_size - off;
1670 /* if we extend to eof, continue to block boundary */ 1694 /* if we extend to eof, continue to block boundary */
1671 if (off + len == src->i_size) 1695 if (off + len == src->i_size)
1672 len = ((src->i_size + bs-1) & ~(bs-1)) 1696 len = ALIGN(src->i_size, bs) - off;
1673 - off;
1674 1697
1675 /* verify the end result is block aligned */ 1698 /* verify the end result is block aligned */
1676 if ((off & (bs-1)) || 1699 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
1677 ((off + len) & (bs-1))) 1700 !IS_ALIGNED(destoff, bs))
1678 goto out_unlock; 1701 goto out_unlock;
1679 1702
1680 /* do any pending delalloc/csum calc on src, one way or 1703 /* do any pending delalloc/csum calc on src, one way or
@@ -1874,8 +1897,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1874 * but shouldn't round up the file size 1897 * but shouldn't round up the file size
1875 */ 1898 */
1876 endoff = new_key.offset + datal; 1899 endoff = new_key.offset + datal;
1877 if (endoff > off+olen) 1900 if (endoff > destoff+olen)
1878 endoff = off+olen; 1901 endoff = destoff+olen;
1879 if (endoff > inode->i_size) 1902 if (endoff > inode->i_size)
1880 btrfs_i_size_write(inode, endoff); 1903 btrfs_i_size_write(inode, endoff);
1881 1904
@@ -2235,7 +2258,7 @@ long btrfs_ioctl(struct file *file, unsigned int
2235 return btrfs_ioctl_getversion(file, argp); 2258 return btrfs_ioctl_getversion(file, argp);
2236 case BTRFS_IOC_SNAP_CREATE: 2259 case BTRFS_IOC_SNAP_CREATE:
2237 return btrfs_ioctl_snap_create(file, argp, 0, 0); 2260 return btrfs_ioctl_snap_create(file, argp, 0, 0);
2238 case BTRFS_IOC_SNAP_CREATE_ASYNC: 2261 case BTRFS_IOC_SNAP_CREATE_V2:
2239 return btrfs_ioctl_snap_create(file, argp, 0, 1); 2262 return btrfs_ioctl_snap_create(file, argp, 0, 1);
2240 case BTRFS_IOC_SUBVOL_CREATE: 2263 case BTRFS_IOC_SUBVOL_CREATE:
2241 return btrfs_ioctl_snap_create(file, argp, 1, 0); 2264 return btrfs_ioctl_snap_create(file, argp, 1, 0);
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 17c99ebdf960..c344d12c646b 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -30,11 +30,15 @@ struct btrfs_ioctl_vol_args {
30 char name[BTRFS_PATH_NAME_MAX + 1]; 30 char name[BTRFS_PATH_NAME_MAX + 1];
31}; 31};
32 32
33#define BTRFS_SNAPSHOT_NAME_MAX 4079 33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
34struct btrfs_ioctl_async_vol_args { 34
35#define BTRFS_SUBVOL_NAME_MAX 4039
36struct btrfs_ioctl_vol_args_v2 {
35 __s64 fd; 37 __s64 fd;
36 __u64 transid; 38 __u64 transid;
37 char name[BTRFS_SNAPSHOT_NAME_MAX + 1]; 39 __u64 flags;
40 __u64 unused[4];
41 char name[BTRFS_SUBVOL_NAME_MAX + 1];
38}; 42};
39 43
40#define BTRFS_INO_LOOKUP_PATH_MAX 4080 44#define BTRFS_INO_LOOKUP_PATH_MAX 4080
@@ -187,6 +191,6 @@ struct btrfs_ioctl_space_args {
187 struct btrfs_ioctl_space_args) 191 struct btrfs_ioctl_space_args)
188#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) 192#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
189#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 193#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
190#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \ 194#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
191 struct btrfs_ioctl_async_vol_args) 195 struct btrfs_ioctl_vol_args_v2)
192#endif 196#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f4621f6deca1..ae7737e352c9 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -250,6 +250,73 @@ int btrfs_add_ordered_sum(struct inode *inode,
250 250
251/* 251/*
252 * this is used to account for finished IO across a given range 252 * this is used to account for finished IO across a given range
253 * of the file. The IO may span ordered extents. If
254 * a given ordered_extent is completely done, 1 is returned, otherwise
255 * 0.
256 *
257 * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
258 * to make sure this function only returns 1 once for a given ordered extent.
259 *
260 * file_offset is updated to one byte past the range that is recorded as
261 * complete. This allows you to walk forward in the file.
262 */
263int btrfs_dec_test_first_ordered_pending(struct inode *inode,
264 struct btrfs_ordered_extent **cached,
265 u64 *file_offset, u64 io_size)
266{
267 struct btrfs_ordered_inode_tree *tree;
268 struct rb_node *node;
269 struct btrfs_ordered_extent *entry = NULL;
270 int ret;
271 u64 dec_end;
272 u64 dec_start;
273 u64 to_dec;
274
275 tree = &BTRFS_I(inode)->ordered_tree;
276 spin_lock(&tree->lock);
277 node = tree_search(tree, *file_offset);
278 if (!node) {
279 ret = 1;
280 goto out;
281 }
282
283 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
284 if (!offset_in_entry(entry, *file_offset)) {
285 ret = 1;
286 goto out;
287 }
288
289 dec_start = max(*file_offset, entry->file_offset);
290 dec_end = min(*file_offset + io_size, entry->file_offset +
291 entry->len);
292 *file_offset = dec_end;
293 if (dec_start > dec_end) {
294 printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
295 (unsigned long long)dec_start,
296 (unsigned long long)dec_end);
297 }
298 to_dec = dec_end - dec_start;
299 if (to_dec > entry->bytes_left) {
300 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
301 (unsigned long long)entry->bytes_left,
302 (unsigned long long)to_dec);
303 }
304 entry->bytes_left -= to_dec;
305 if (entry->bytes_left == 0)
306 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
307 else
308 ret = 1;
309out:
310 if (!ret && cached && entry) {
311 *cached = entry;
312 atomic_inc(&entry->refs);
313 }
314 spin_unlock(&tree->lock);
315 return ret == 0;
316}
317
318/*
319 * this is used to account for finished IO across a given range
253 * of the file. The IO should not span ordered extents. If 320 * of the file. The IO should not span ordered extents. If
254 * a given ordered_extent is completely done, 1 is returned, otherwise 321 * a given ordered_extent is completely done, 1 is returned, otherwise
255 * 0. 322 * 0.
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8ac365492a3f..61dca83119dd 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
141int btrfs_dec_test_ordered_pending(struct inode *inode, 141int btrfs_dec_test_ordered_pending(struct inode *inode,
142 struct btrfs_ordered_extent **cached, 142 struct btrfs_ordered_extent **cached,
143 u64 file_offset, u64 io_size); 143 u64 file_offset, u64 io_size);
144int btrfs_dec_test_first_ordered_pending(struct inode *inode,
145 struct btrfs_ordered_extent **cached,
146 u64 *file_offset, u64 io_size);
144int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 147int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
145 u64 start, u64 len, u64 disk_len, int type); 148 u64 start, u64 len, u64 disk_len, int type);
146int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, 149int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 79cba5fbc28e..f8be250963a0 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
56 return -ENOMEM; 56 return -ENOMEM;
57 57
58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
59 if (ret) 59 if (ret < 0)
60 goto out; 60 goto out;
61 if (ret) {
62 ret = -ENOENT;
63 goto out;
64 }
61 65
62 ret = btrfs_del_item(trans, root, path); 66 ret = btrfs_del_item(trans, root, path);
63 67
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 8299a25ffc8f..883c6fa1367e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -244,6 +244,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
244 case Opt_space_cache: 244 case Opt_space_cache:
245 printk(KERN_INFO "btrfs: enabling disk space caching\n"); 245 printk(KERN_INFO "btrfs: enabling disk space caching\n");
246 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 246 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
247 break;
247 case Opt_clear_cache: 248 case Opt_clear_cache:
248 printk(KERN_INFO "btrfs: force clearing of disk cache\n"); 249 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
249 btrfs_set_opt(info->mount_opt, CLEAR_CACHE); 250 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
@@ -562,12 +563,26 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
562 563
563static int btrfs_test_super(struct super_block *s, void *data) 564static int btrfs_test_super(struct super_block *s, void *data)
564{ 565{
565 struct btrfs_fs_devices *test_fs_devices = data; 566 struct btrfs_root *test_root = data;
566 struct btrfs_root *root = btrfs_sb(s); 567 struct btrfs_root *root = btrfs_sb(s);
567 568
568 return root->fs_info->fs_devices == test_fs_devices; 569 /*
570 * If this super block is going away, return false as it
571 * can't match as an existing super block.
572 */
573 if (!atomic_read(&s->s_active))
574 return 0;
575 return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
576}
577
578static int btrfs_set_super(struct super_block *s, void *data)
579{
580 s->s_fs_info = data;
581
582 return set_anon_super(s, data);
569} 583}
570 584
585
571/* 586/*
572 * Find a superblock for the given device / mount point. 587 * Find a superblock for the given device / mount point.
573 * 588 *
@@ -581,6 +596,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
581 struct super_block *s; 596 struct super_block *s;
582 struct dentry *root; 597 struct dentry *root;
583 struct btrfs_fs_devices *fs_devices = NULL; 598 struct btrfs_fs_devices *fs_devices = NULL;
599 struct btrfs_root *tree_root = NULL;
600 struct btrfs_fs_info *fs_info = NULL;
584 fmode_t mode = FMODE_READ; 601 fmode_t mode = FMODE_READ;
585 char *subvol_name = NULL; 602 char *subvol_name = NULL;
586 u64 subvol_objectid = 0; 603 u64 subvol_objectid = 0;
@@ -608,8 +625,24 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
608 goto error_close_devices; 625 goto error_close_devices;
609 } 626 }
610 627
628 /*
629 * Setup a dummy root and fs_info for test/set super. This is because
630 * we don't actually fill this stuff out until open_ctree, but we need
631 * it for searching for existing supers, so this lets us do that and
632 * then open_ctree will properly initialize everything later.
633 */
634 fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
635 tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
636 if (!fs_info || !tree_root) {
637 error = -ENOMEM;
638 goto error_close_devices;
639 }
640 fs_info->tree_root = tree_root;
641 fs_info->fs_devices = fs_devices;
642 tree_root->fs_info = fs_info;
643
611 bdev = fs_devices->latest_bdev; 644 bdev = fs_devices->latest_bdev;
612 s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices); 645 s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
613 if (IS_ERR(s)) 646 if (IS_ERR(s))
614 goto error_s; 647 goto error_s;
615 648
@@ -652,9 +685,9 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
652 mutex_unlock(&root->d_inode->i_mutex); 685 mutex_unlock(&root->d_inode->i_mutex);
653 686
654 if (IS_ERR(new_root)) { 687 if (IS_ERR(new_root)) {
688 dput(root);
655 deactivate_locked_super(s); 689 deactivate_locked_super(s);
656 error = PTR_ERR(new_root); 690 error = PTR_ERR(new_root);
657 dput(root);
658 goto error_free_subvol_name; 691 goto error_free_subvol_name;
659 } 692 }
660 if (!new_root->d_inode) { 693 if (!new_root->d_inode) {
@@ -675,6 +708,8 @@ error_s:
675 error = PTR_ERR(s); 708 error = PTR_ERR(s);
676error_close_devices: 709error_close_devices:
677 btrfs_close_devices(fs_devices); 710 btrfs_close_devices(fs_devices);
711 kfree(fs_info);
712 kfree(tree_root);
678error_free_subvol_name: 713error_free_subvol_name:
679 kfree(subvol_name); 714 kfree(subvol_name);
680 return ERR_PTR(error); 715 return ERR_PTR(error);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 1fffbc017bdf..f50e931fc217 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -902,6 +902,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
902 struct btrfs_root *root = pending->root; 902 struct btrfs_root *root = pending->root;
903 struct btrfs_root *parent_root; 903 struct btrfs_root *parent_root;
904 struct inode *parent_inode; 904 struct inode *parent_inode;
905 struct dentry *parent;
905 struct dentry *dentry; 906 struct dentry *dentry;
906 struct extent_buffer *tmp; 907 struct extent_buffer *tmp;
907 struct extent_buffer *old; 908 struct extent_buffer *old;
@@ -941,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
941 trans->block_rsv = &pending->block_rsv; 942 trans->block_rsv = &pending->block_rsv;
942 943
943 dentry = pending->dentry; 944 dentry = pending->dentry;
944 parent_inode = dentry->d_parent->d_inode; 945 parent = dget_parent(dentry);
946 parent_inode = parent->d_inode;
945 parent_root = BTRFS_I(parent_inode)->root; 947 parent_root = BTRFS_I(parent_inode)->root;
946 record_root_in_trans(trans, parent_root); 948 record_root_in_trans(trans, parent_root);
947 949
@@ -989,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
989 parent_inode->i_ino, index, 991 parent_inode->i_ino, index,
990 dentry->d_name.name, dentry->d_name.len); 992 dentry->d_name.name, dentry->d_name.len);
991 BUG_ON(ret); 993 BUG_ON(ret);
994 dput(parent);
992 995
993 key.offset = (u64)-1; 996 key.offset = (u64)-1;
994 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); 997 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index a29f19384a27..054744ac5719 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2869,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
2869{ 2869{
2870 int ret = 0; 2870 int ret = 0;
2871 struct btrfs_root *root; 2871 struct btrfs_root *root;
2872 struct dentry *old_parent = NULL;
2872 2873
2873 /* 2874 /*
2874 * for regular files, if its inode is already on disk, we don't 2875 * for regular files, if its inode is already on disk, we don't
@@ -2910,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
2910 if (IS_ROOT(parent)) 2911 if (IS_ROOT(parent))
2911 break; 2912 break;
2912 2913
2913 parent = parent->d_parent; 2914 parent = dget_parent(parent);
2915 dput(old_parent);
2916 old_parent = parent;
2914 inode = parent->d_inode; 2917 inode = parent->d_inode;
2915 2918
2916 } 2919 }
2920 dput(old_parent);
2917out: 2921out:
2918 return ret; 2922 return ret;
2919} 2923}
@@ -2945,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
2945{ 2949{
2946 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; 2950 int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
2947 struct super_block *sb; 2951 struct super_block *sb;
2952 struct dentry *old_parent = NULL;
2948 int ret = 0; 2953 int ret = 0;
2949 u64 last_committed = root->fs_info->last_trans_committed; 2954 u64 last_committed = root->fs_info->last_trans_committed;
2950 2955
@@ -3016,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3016 if (IS_ROOT(parent)) 3021 if (IS_ROOT(parent))
3017 break; 3022 break;
3018 3023
3019 parent = parent->d_parent; 3024 parent = dget_parent(parent);
3025 dput(old_parent);
3026 old_parent = parent;
3020 } 3027 }
3021 ret = 0; 3028 ret = 0;
3022end_trans: 3029end_trans:
3030 dput(old_parent);
3023 if (ret < 0) { 3031 if (ret < 0) {
3024 BUG_ON(ret != -ENOSPC); 3032 BUG_ON(ret != -ENOSPC);
3025 root->fs_info->last_trans_log_full_commit = trans->transid; 3033 root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -3039,8 +3047,13 @@ end_no_trans:
3039int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, 3047int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
3040 struct btrfs_root *root, struct dentry *dentry) 3048 struct btrfs_root *root, struct dentry *dentry)
3041{ 3049{
3042 return btrfs_log_inode_parent(trans, root, dentry->d_inode, 3050 struct dentry *parent = dget_parent(dentry);
3043 dentry->d_parent, 0); 3051 int ret;
3052
3053 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
3054 dput(parent);
3055
3056 return ret;
3044} 3057}
3045 3058
3046/* 3059/*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cc04dc1445d6..6b9884507837 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -412,12 +412,16 @@ static noinline int device_list_add(const char *path,
412 412
413 device->fs_devices = fs_devices; 413 device->fs_devices = fs_devices;
414 fs_devices->num_devices++; 414 fs_devices->num_devices++;
415 } else if (strcmp(device->name, path)) { 415 } else if (!device->name || strcmp(device->name, path)) {
416 name = kstrdup(path, GFP_NOFS); 416 name = kstrdup(path, GFP_NOFS);
417 if (!name) 417 if (!name)
418 return -ENOMEM; 418 return -ENOMEM;
419 kfree(device->name); 419 kfree(device->name);
420 device->name = name; 420 device->name = name;
421 if (device->missing) {
422 fs_devices->missing_devices--;
423 device->missing = 0;
424 }
421 } 425 }
422 426
423 if (found_transid > fs_devices->latest_trans) { 427 if (found_transid > fs_devices->latest_trans) {
@@ -1236,6 +1240,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1236 1240
1237 device->fs_devices->num_devices--; 1241 device->fs_devices->num_devices--;
1238 1242
1243 if (device->missing)
1244 root->fs_info->fs_devices->missing_devices--;
1245
1239 next_device = list_entry(root->fs_info->fs_devices->devices.next, 1246 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1240 struct btrfs_device, dev_list); 1247 struct btrfs_device, dev_list);
1241 if (device->bdev == root->fs_info->sb->s_bdev) 1248 if (device->bdev == root->fs_info->sb->s_bdev)
@@ -3080,7 +3087,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
3080 device->devid = devid; 3087 device->devid = devid;
3081 device->work.func = pending_bios_fn; 3088 device->work.func = pending_bios_fn;
3082 device->fs_devices = fs_devices; 3089 device->fs_devices = fs_devices;
3090 device->missing = 1;
3083 fs_devices->num_devices++; 3091 fs_devices->num_devices++;
3092 fs_devices->missing_devices++;
3084 spin_lock_init(&device->io_lock); 3093 spin_lock_init(&device->io_lock);
3085 INIT_LIST_HEAD(&device->dev_alloc_list); 3094 INIT_LIST_HEAD(&device->dev_alloc_list);
3086 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); 3095 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@ -3278,6 +3287,15 @@ static int read_one_dev(struct btrfs_root *root,
3278 device = add_missing_dev(root, devid, dev_uuid); 3287 device = add_missing_dev(root, devid, dev_uuid);
3279 if (!device) 3288 if (!device)
3280 return -ENOMEM; 3289 return -ENOMEM;
3290 } else if (!device->missing) {
3291 /*
3292 * this happens when a device that was properly setup
3293 * in the device info lists suddenly goes bad.
3294 * device->bdev is NULL, and so we have to set
3295 * device->missing to one here
3296 */
3297 root->fs_info->fs_devices->missing_devices++;
3298 device->missing = 1;
3281 } 3299 }
3282 } 3300 }
3283 3301
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2b638b6e4eea..2740db49eb04 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -44,6 +44,7 @@ struct btrfs_device {
44 44
45 int writeable; 45 int writeable;
46 int in_fs_metadata; 46 int in_fs_metadata;
47 int missing;
47 48
48 spinlock_t io_lock; 49 spinlock_t io_lock;
49 50
@@ -93,6 +94,7 @@ struct btrfs_fs_devices {
93 u64 num_devices; 94 u64 num_devices;
94 u64 open_devices; 95 u64 open_devices;
95 u64 rw_devices; 96 u64 rw_devices;
97 u64 missing_devices;
96 u64 total_rw_bytes; 98 u64 total_rw_bytes;
97 struct block_device *latest_bdev; 99 struct block_device *latest_bdev;
98 100
diff --git a/fs/buffer.c b/fs/buffer.c
index 5930e382959b..2219a76e2caf 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1270,12 +1270,10 @@ static inline void check_irqs_on(void)
1270static void bh_lru_install(struct buffer_head *bh) 1270static void bh_lru_install(struct buffer_head *bh)
1271{ 1271{
1272 struct buffer_head *evictee = NULL; 1272 struct buffer_head *evictee = NULL;
1273 struct bh_lru *lru;
1274 1273
1275 check_irqs_on(); 1274 check_irqs_on();
1276 bh_lru_lock(); 1275 bh_lru_lock();
1277 lru = &__get_cpu_var(bh_lrus); 1276 if (__this_cpu_read(bh_lrus.bhs[0]) != bh) {
1278 if (lru->bhs[0] != bh) {
1279 struct buffer_head *bhs[BH_LRU_SIZE]; 1277 struct buffer_head *bhs[BH_LRU_SIZE];
1280 int in; 1278 int in;
1281 int out = 0; 1279 int out = 0;
@@ -1283,7 +1281,8 @@ static void bh_lru_install(struct buffer_head *bh)
1283 get_bh(bh); 1281 get_bh(bh);
1284 bhs[out++] = bh; 1282 bhs[out++] = bh;
1285 for (in = 0; in < BH_LRU_SIZE; in++) { 1283 for (in = 0; in < BH_LRU_SIZE; in++) {
1286 struct buffer_head *bh2 = lru->bhs[in]; 1284 struct buffer_head *bh2 =
1285 __this_cpu_read(bh_lrus.bhs[in]);
1287 1286
1288 if (bh2 == bh) { 1287 if (bh2 == bh) {
1289 __brelse(bh2); 1288 __brelse(bh2);
@@ -1298,7 +1297,7 @@ static void bh_lru_install(struct buffer_head *bh)
1298 } 1297 }
1299 while (out < BH_LRU_SIZE) 1298 while (out < BH_LRU_SIZE)
1300 bhs[out++] = NULL; 1299 bhs[out++] = NULL;
1301 memcpy(lru->bhs, bhs, sizeof(bhs)); 1300 memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
1302 } 1301 }
1303 bh_lru_unlock(); 1302 bh_lru_unlock();
1304 1303
@@ -1313,23 +1312,22 @@ static struct buffer_head *
1313lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size) 1312lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
1314{ 1313{
1315 struct buffer_head *ret = NULL; 1314 struct buffer_head *ret = NULL;
1316 struct bh_lru *lru;
1317 unsigned int i; 1315 unsigned int i;
1318 1316
1319 check_irqs_on(); 1317 check_irqs_on();
1320 bh_lru_lock(); 1318 bh_lru_lock();
1321 lru = &__get_cpu_var(bh_lrus);
1322 for (i = 0; i < BH_LRU_SIZE; i++) { 1319 for (i = 0; i < BH_LRU_SIZE; i++) {
1323 struct buffer_head *bh = lru->bhs[i]; 1320 struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
1324 1321
1325 if (bh && bh->b_bdev == bdev && 1322 if (bh && bh->b_bdev == bdev &&
1326 bh->b_blocknr == block && bh->b_size == size) { 1323 bh->b_blocknr == block && bh->b_size == size) {
1327 if (i) { 1324 if (i) {
1328 while (i) { 1325 while (i) {
1329 lru->bhs[i] = lru->bhs[i - 1]; 1326 __this_cpu_write(bh_lrus.bhs[i],
1327 __this_cpu_read(bh_lrus.bhs[i - 1]));
1330 i--; 1328 i--;
1331 } 1329 }
1332 lru->bhs[0] = bh; 1330 __this_cpu_write(bh_lrus.bhs[0], bh);
1333 } 1331 }
1334 get_bh(bh); 1332 get_bh(bh);
1335 ret = bh; 1333 ret = bh;
@@ -3203,22 +3201,23 @@ static void recalc_bh_state(void)
3203 int i; 3201 int i;
3204 int tot = 0; 3202 int tot = 0;
3205 3203
3206 if (__get_cpu_var(bh_accounting).ratelimit++ < 4096) 3204 if (__this_cpu_inc_return(bh_accounting.ratelimit) - 1 < 4096)
3207 return; 3205 return;
3208 __get_cpu_var(bh_accounting).ratelimit = 0; 3206 __this_cpu_write(bh_accounting.ratelimit, 0);
3209 for_each_online_cpu(i) 3207 for_each_online_cpu(i)
3210 tot += per_cpu(bh_accounting, i).nr; 3208 tot += per_cpu(bh_accounting, i).nr;
3211 buffer_heads_over_limit = (tot > max_buffer_heads); 3209 buffer_heads_over_limit = (tot > max_buffer_heads);
3212} 3210}
3213 3211
3214struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) 3212struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
3215{ 3213{
3216 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); 3214 struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
3217 if (ret) { 3215 if (ret) {
3218 INIT_LIST_HEAD(&ret->b_assoc_buffers); 3216 INIT_LIST_HEAD(&ret->b_assoc_buffers);
3219 get_cpu_var(bh_accounting).nr++; 3217 preempt_disable();
3218 __this_cpu_inc(bh_accounting.nr);
3220 recalc_bh_state(); 3219 recalc_bh_state();
3221 put_cpu_var(bh_accounting); 3220 preempt_enable();
3222 } 3221 }
3223 return ret; 3222 return ret;
3224} 3223}
@@ -3228,9 +3227,10 @@ void free_buffer_head(struct buffer_head *bh)
3228{ 3227{
3229 BUG_ON(!list_empty(&bh->b_assoc_buffers)); 3228 BUG_ON(!list_empty(&bh->b_assoc_buffers));
3230 kmem_cache_free(bh_cachep, bh); 3229 kmem_cache_free(bh_cachep, bh);
3231 get_cpu_var(bh_accounting).nr--; 3230 preempt_disable();
3231 __this_cpu_dec(bh_accounting.nr);
3232 recalc_bh_state(); 3232 recalc_bh_state();
3233 put_cpu_var(bh_accounting); 3233 preempt_enable();
3234} 3234}
3235EXPORT_SYMBOL(free_buffer_head); 3235EXPORT_SYMBOL(free_buffer_head);
3236 3236
@@ -3243,9 +3243,8 @@ static void buffer_exit_cpu(int cpu)
3243 brelse(b->bhs[i]); 3243 brelse(b->bhs[i]);
3244 b->bhs[i] = NULL; 3244 b->bhs[i] = NULL;
3245 } 3245 }
3246 get_cpu_var(bh_accounting).nr += per_cpu(bh_accounting, cpu).nr; 3246 this_cpu_add(bh_accounting.nr, per_cpu(bh_accounting, cpu).nr);
3247 per_cpu(bh_accounting, cpu).nr = 0; 3247 per_cpu(bh_accounting, cpu).nr = 0;
3248 put_cpu_var(bh_accounting);
3249} 3248}
3250 3249
3251static int buffer_cpu_notify(struct notifier_block *self, 3250static int buffer_cpu_notify(struct notifier_block *self,
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e9c874abc9e1..561438b6a50c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
204 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 204 err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
205 page->index << PAGE_CACHE_SHIFT, &len, 205 page->index << PAGE_CACHE_SHIFT, &len,
206 ci->i_truncate_seq, ci->i_truncate_size, 206 ci->i_truncate_seq, ci->i_truncate_size,
207 &page, 1); 207 &page, 1, 0);
208 if (err == -ENOENT) 208 if (err == -ENOENT)
209 err = 0; 209 err = 0;
210 if (err < 0) { 210 if (err < 0) {
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
287 rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout, 287 rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
288 offset, &len, 288 offset, &len,
289 ci->i_truncate_seq, ci->i_truncate_size, 289 ci->i_truncate_seq, ci->i_truncate_size,
290 pages, nr_pages); 290 pages, nr_pages, 0);
291 if (rc == -ENOENT) 291 if (rc == -ENOENT)
292 rc = 0; 292 rc = 0;
293 if (rc < 0) 293 if (rc < 0)
@@ -774,7 +774,7 @@ get_more_pages:
774 snapc, do_sync, 774 snapc, do_sync,
775 ci->i_truncate_seq, 775 ci->i_truncate_seq,
776 ci->i_truncate_size, 776 ci->i_truncate_size,
777 &inode->i_mtime, true, 1); 777 &inode->i_mtime, true, 1, 0);
778 max_pages = req->r_num_pages; 778 max_pages = req->r_num_pages;
779 779
780 alloc_page_vec(fsc, req); 780 alloc_page_vec(fsc, req);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 98ab13e2b71d..60d27bc9eb83 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
1430 invalidating_gen == ci->i_rdcache_gen) { 1430 invalidating_gen == ci->i_rdcache_gen) {
1431 /* success. */ 1431 /* success. */
1432 dout("try_nonblocking_invalidate %p success\n", inode); 1432 dout("try_nonblocking_invalidate %p success\n", inode);
1433 ci->i_rdcache_gen = 0; 1433 /* save any racing async invalidate some trouble */
1434 ci->i_rdcache_revoking = 0; 1434 ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
1435 return 0; 1435 return 0;
1436 } 1436 }
1437 dout("try_nonblocking_invalidate %p failed\n", inode); 1437 dout("try_nonblocking_invalidate %p failed\n", inode);
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2273{ 2273{
2274 struct ceph_inode_info *ci = ceph_inode(inode); 2274 struct ceph_inode_info *ci = ceph_inode(inode);
2275 int mds = session->s_mds; 2275 int mds = session->s_mds;
2276 unsigned seq = le32_to_cpu(grant->seq); 2276 int seq = le32_to_cpu(grant->seq);
2277 unsigned issue_seq = le32_to_cpu(grant->issue_seq);
2278 int newcaps = le32_to_cpu(grant->caps); 2277 int newcaps = le32_to_cpu(grant->caps);
2279 int issued, implemented, used, wanted, dirty; 2278 int issued, implemented, used, wanted, dirty;
2280 u64 size = le64_to_cpu(grant->size); 2279 u64 size = le64_to_cpu(grant->size);
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2286 int revoked_rdcache = 0; 2285 int revoked_rdcache = 0;
2287 int queue_invalidate = 0; 2286 int queue_invalidate = 0;
2288 2287
2289 dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", 2288 dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
2290 inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); 2289 inode, cap, mds, seq, ceph_cap_string(newcaps));
2291 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, 2290 dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
2292 inode->i_size); 2291 inode->i_size);
2293 2292
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2383 } 2382 }
2384 2383
2385 cap->seq = seq; 2384 cap->seq = seq;
2386 cap->issue_seq = issue_seq;
2387 2385
2388 /* file layout may have changed */ 2386 /* file layout may have changed */
2389 ci->i_layout = grant->layout; 2387 ci->i_layout = grant->layout;
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2691 NULL /* no caps context */); 2689 NULL /* no caps context */);
2692 try_flush_caps(inode, session, NULL); 2690 try_flush_caps(inode, session, NULL);
2693 up_read(&mdsc->snap_rwsem); 2691 up_read(&mdsc->snap_rwsem);
2692
2693 /* make sure we re-request max_size, if necessary */
2694 spin_lock(&inode->i_lock);
2695 ci->i_requested_max_size = 0;
2696 spin_unlock(&inode->i_lock);
2694} 2697}
2695 2698
2696/* 2699/*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e0a2dc6fcafc..fa7ca04ee816 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,12 +40,13 @@ int ceph_init_dentry(struct dentry *dentry)
40 if (dentry->d_fsdata) 40 if (dentry->d_fsdata)
41 return 0; 41 return 0;
42 42
43 if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) 43 if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
44 dentry->d_op = &ceph_dentry_ops; 44 ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
45 d_set_d_op(dentry, &ceph_dentry_ops);
45 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) 46 else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
46 dentry->d_op = &ceph_snapdir_dentry_ops; 47 d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
47 else 48 else
48 dentry->d_op = &ceph_snap_dentry_ops; 49 d_set_d_op(dentry, &ceph_snap_dentry_ops);
49 50
50 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); 51 di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
51 if (!di) 52 if (!di)
@@ -111,11 +112,11 @@ static int __dcache_readdir(struct file *filp,
111 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, 112 dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos,
112 last); 113 last);
113 114
114 spin_lock(&dcache_lock); 115 spin_lock(&parent->d_lock);
115 116
116 /* start at beginning? */ 117 /* start at beginning? */
117 if (filp->f_pos == 2 || (last && 118 if (filp->f_pos == 2 || last == NULL ||
118 filp->f_pos < ceph_dentry(last)->offset)) { 119 filp->f_pos < ceph_dentry(last)->offset) {
119 if (list_empty(&parent->d_subdirs)) 120 if (list_empty(&parent->d_subdirs))
120 goto out_unlock; 121 goto out_unlock;
121 p = parent->d_subdirs.prev; 122 p = parent->d_subdirs.prev;
@@ -135,6 +136,7 @@ more:
135 fi->at_end = 1; 136 fi->at_end = 1;
136 goto out_unlock; 137 goto out_unlock;
137 } 138 }
139 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
138 if (!d_unhashed(dentry) && dentry->d_inode && 140 if (!d_unhashed(dentry) && dentry->d_inode &&
139 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && 141 ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
140 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && 142 ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
@@ -144,13 +146,15 @@ more:
144 dentry->d_name.len, dentry->d_name.name, di->offset, 146 dentry->d_name.len, dentry->d_name.name, di->offset,
145 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", 147 filp->f_pos, d_unhashed(dentry) ? " unhashed" : "",
146 !dentry->d_inode ? " null" : ""); 148 !dentry->d_inode ? " null" : "");
149 spin_unlock(&dentry->d_lock);
147 p = p->prev; 150 p = p->prev;
148 dentry = list_entry(p, struct dentry, d_u.d_child); 151 dentry = list_entry(p, struct dentry, d_u.d_child);
149 di = ceph_dentry(dentry); 152 di = ceph_dentry(dentry);
150 } 153 }
151 154
152 atomic_inc(&dentry->d_count); 155 dget_dlock(dentry);
153 spin_unlock(&dcache_lock); 156 spin_unlock(&dentry->d_lock);
157 spin_unlock(&parent->d_lock);
154 158
155 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, 159 dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos,
156 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 160 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
@@ -176,19 +180,19 @@ more:
176 180
177 filp->f_pos++; 181 filp->f_pos++;
178 182
179 /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ 183 /* make sure a dentry wasn't dropped while we didn't have parent lock */
180 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { 184 if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
181 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); 185 dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
182 err = -EAGAIN; 186 err = -EAGAIN;
183 goto out; 187 goto out;
184 } 188 }
185 189
186 spin_lock(&dcache_lock); 190 spin_lock(&parent->d_lock);
187 p = p->prev; /* advance to next dentry */ 191 p = p->prev; /* advance to next dentry */
188 goto more; 192 goto more;
189 193
190out_unlock: 194out_unlock:
191 spin_unlock(&dcache_lock); 195 spin_unlock(&parent->d_lock);
192out: 196out:
193 if (last) 197 if (last)
194 dput(last); 198 dput(last);
@@ -336,7 +340,10 @@ more:
336 if (req->r_reply_info.dir_end) { 340 if (req->r_reply_info.dir_end) {
337 kfree(fi->last_name); 341 kfree(fi->last_name);
338 fi->last_name = NULL; 342 fi->last_name = NULL;
339 fi->next_offset = 2; 343 if (ceph_frag_is_rightmost(frag))
344 fi->next_offset = 2;
345 else
346 fi->next_offset = 0;
340 } else { 347 } else {
341 rinfo = &req->r_reply_info; 348 rinfo = &req->r_reply_info;
342 err = note_last_dentry(fi, 349 err = note_last_dentry(fi,
@@ -355,18 +362,22 @@ more:
355 u64 pos = ceph_make_fpos(frag, off); 362 u64 pos = ceph_make_fpos(frag, off);
356 struct ceph_mds_reply_inode *in = 363 struct ceph_mds_reply_inode *in =
357 rinfo->dir_in[off - fi->offset].in; 364 rinfo->dir_in[off - fi->offset].in;
365 struct ceph_vino vino;
366 ino_t ino;
367
358 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", 368 dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
359 off, off - fi->offset, rinfo->dir_nr, pos, 369 off, off - fi->offset, rinfo->dir_nr, pos,
360 rinfo->dir_dname_len[off - fi->offset], 370 rinfo->dir_dname_len[off - fi->offset],
361 rinfo->dir_dname[off - fi->offset], in); 371 rinfo->dir_dname[off - fi->offset], in);
362 BUG_ON(!in); 372 BUG_ON(!in);
363 ftype = le32_to_cpu(in->mode) >> 12; 373 ftype = le32_to_cpu(in->mode) >> 12;
374 vino.ino = le64_to_cpu(in->ino);
375 vino.snap = le64_to_cpu(in->snapid);
376 ino = ceph_vino_to_ino(vino);
364 if (filldir(dirent, 377 if (filldir(dirent,
365 rinfo->dir_dname[off - fi->offset], 378 rinfo->dir_dname[off - fi->offset],
366 rinfo->dir_dname_len[off - fi->offset], 379 rinfo->dir_dname_len[off - fi->offset],
367 pos, 380 pos, ino, ftype) < 0) {
368 le64_to_cpu(in->ino),
369 ftype) < 0) {
370 dout("filldir stopping us...\n"); 381 dout("filldir stopping us...\n");
371 return 0; 382 return 0;
372 } 383 }
@@ -414,6 +425,7 @@ static void reset_readdir(struct ceph_file_info *fi)
414 fi->last_readdir = NULL; 425 fi->last_readdir = NULL;
415 } 426 }
416 kfree(fi->last_name); 427 kfree(fi->last_name);
428 fi->last_name = NULL;
417 fi->next_offset = 2; /* compensate for . and .. */ 429 fi->next_offset = 2; /* compensate for . and .. */
418 if (fi->dentry) { 430 if (fi->dentry) {
419 dput(fi->dentry); 431 dput(fi->dentry);
@@ -978,7 +990,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
978 */ 990 */
979static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) 991static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
980{ 992{
981 struct inode *dir = dentry->d_parent->d_inode; 993 struct inode *dir;
994
995 if (nd->flags & LOOKUP_RCU)
996 return -ECHILD;
997
998 dir = dentry->d_parent->d_inode;
982 999
983 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, 1000 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
984 dentry->d_name.len, dentry->d_name.name, dentry->d_inode, 1001 dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e77c28cf3690..7d0e4a82d898 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file)
154 } 154 }
155 155
156 /* 156 /*
157 * No need to block if we have any caps. Update wanted set 157 * No need to block if we have caps on the auth MDS (for
158 * write) or any MDS (for read). Update wanted set
158 * asynchronously. 159 * asynchronously.
159 */ 160 */
160 spin_lock(&inode->i_lock); 161 spin_lock(&inode->i_lock);
161 if (__ceph_is_any_real_caps(ci)) { 162 if (__ceph_is_any_real_caps(ci) &&
163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
162 int mds_wanted = __ceph_caps_mds_wanted(ci); 164 int mds_wanted = __ceph_caps_mds_wanted(ci);
163 int issued = __ceph_caps_issued(ci, NULL); 165 int issued = __ceph_caps_issued(ci, NULL);
164 166
@@ -280,11 +282,13 @@ int ceph_release(struct inode *inode, struct file *file)
280static int striped_read(struct inode *inode, 282static int striped_read(struct inode *inode,
281 u64 off, u64 len, 283 u64 off, u64 len,
282 struct page **pages, int num_pages, 284 struct page **pages, int num_pages,
283 int *checkeof) 285 int *checkeof, bool align_to_pages,
286 unsigned long buf_align)
284{ 287{
285 struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 288 struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
286 struct ceph_inode_info *ci = ceph_inode(inode); 289 struct ceph_inode_info *ci = ceph_inode(inode);
287 u64 pos, this_len; 290 u64 pos, this_len;
291 int io_align, page_align;
288 int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ 292 int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
289 int left, pages_left; 293 int left, pages_left;
290 int read; 294 int read;
@@ -300,14 +304,19 @@ static int striped_read(struct inode *inode,
300 page_pos = pages; 304 page_pos = pages;
301 pages_left = num_pages; 305 pages_left = num_pages;
302 read = 0; 306 read = 0;
307 io_align = off & ~PAGE_MASK;
303 308
304more: 309more:
310 if (align_to_pages)
311 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
312 else
313 page_align = pos & ~PAGE_MASK;
305 this_len = left; 314 this_len = left;
306 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), 315 ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
307 &ci->i_layout, pos, &this_len, 316 &ci->i_layout, pos, &this_len,
308 ci->i_truncate_seq, 317 ci->i_truncate_seq,
309 ci->i_truncate_size, 318 ci->i_truncate_size,
310 page_pos, pages_left); 319 page_pos, pages_left, page_align);
311 hit_stripe = this_len < left; 320 hit_stripe = this_len < left;
312 was_short = ret >= 0 && ret < this_len; 321 was_short = ret >= 0 && ret < this_len;
313 if (ret == -ENOENT) 322 if (ret == -ENOENT)
@@ -368,32 +377,34 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
368 struct inode *inode = file->f_dentry->d_inode; 377 struct inode *inode = file->f_dentry->d_inode;
369 struct page **pages; 378 struct page **pages;
370 u64 off = *poff; 379 u64 off = *poff;
371 int num_pages = calc_pages_for(off, len); 380 int num_pages, ret;
372 int ret;
373 381
374 dout("sync_read on file %p %llu~%u %s\n", file, off, len, 382 dout("sync_read on file %p %llu~%u %s\n", file, off, len,
375 (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); 383 (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
376 384
377 if (file->f_flags & O_DIRECT) { 385 if (file->f_flags & O_DIRECT) {
378 pages = ceph_get_direct_page_vector(data, num_pages, off, len); 386 num_pages = calc_pages_for((unsigned long)data, len);
379 387 pages = ceph_get_direct_page_vector(data, num_pages, true);
380 /*
381 * flush any page cache pages in this range. this
382 * will make concurrent normal and O_DIRECT io slow,
383 * but it will at least behave sensibly when they are
384 * in sequence.
385 */
386 } else { 388 } else {
389 num_pages = calc_pages_for(off, len);
387 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS); 390 pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
388 } 391 }
389 if (IS_ERR(pages)) 392 if (IS_ERR(pages))
390 return PTR_ERR(pages); 393 return PTR_ERR(pages);
391 394
395 /*
396 * flush any page cache pages in this range. this
397 * will make concurrent normal and sync io slow,
398 * but it will at least behave sensibly when they are
399 * in sequence.
400 */
392 ret = filemap_write_and_wait(inode->i_mapping); 401 ret = filemap_write_and_wait(inode->i_mapping);
393 if (ret < 0) 402 if (ret < 0)
394 goto done; 403 goto done;
395 404
396 ret = striped_read(inode, off, len, pages, num_pages, checkeof); 405 ret = striped_read(inode, off, len, pages, num_pages, checkeof,
406 file->f_flags & O_DIRECT,
407 (unsigned long)data & ~PAGE_MASK);
397 408
398 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) 409 if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
399 ret = ceph_copy_page_vector_to_user(pages, data, off, ret); 410 ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -402,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
402 413
403done: 414done:
404 if (file->f_flags & O_DIRECT) 415 if (file->f_flags & O_DIRECT)
405 ceph_put_page_vector(pages, num_pages); 416 ceph_put_page_vector(pages, num_pages, true);
406 else 417 else
407 ceph_release_page_vector(pages, num_pages); 418 ceph_release_page_vector(pages, num_pages);
408 dout("sync_read result %d\n", ret); 419 dout("sync_read result %d\n", ret);
@@ -448,6 +459,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
448 int flags; 459 int flags;
449 int do_sync = 0; 460 int do_sync = 0;
450 int check_caps = 0; 461 int check_caps = 0;
462 int page_align, io_align;
463 unsigned long buf_align;
451 int ret; 464 int ret;
452 struct timespec mtime = CURRENT_TIME; 465 struct timespec mtime = CURRENT_TIME;
453 466
@@ -462,6 +475,9 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
462 else 475 else
463 pos = *offset; 476 pos = *offset;
464 477
478 io_align = pos & ~PAGE_MASK;
479 buf_align = (unsigned long)data & ~PAGE_MASK;
480
465 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); 481 ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
466 if (ret < 0) 482 if (ret < 0)
467 return ret; 483 return ret;
@@ -486,20 +502,27 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
486 */ 502 */
487more: 503more:
488 len = left; 504 len = left;
505 if (file->f_flags & O_DIRECT) {
506 /* write from beginning of first page, regardless of
507 io alignment */
508 page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
509 num_pages = calc_pages_for((unsigned long)data, len);
510 } else {
511 page_align = pos & ~PAGE_MASK;
512 num_pages = calc_pages_for(pos, len);
513 }
489 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, 514 req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
490 ceph_vino(inode), pos, &len, 515 ceph_vino(inode), pos, &len,
491 CEPH_OSD_OP_WRITE, flags, 516 CEPH_OSD_OP_WRITE, flags,
492 ci->i_snap_realm->cached_context, 517 ci->i_snap_realm->cached_context,
493 do_sync, 518 do_sync,
494 ci->i_truncate_seq, ci->i_truncate_size, 519 ci->i_truncate_seq, ci->i_truncate_size,
495 &mtime, false, 2); 520 &mtime, false, 2, page_align);
496 if (!req) 521 if (!req)
497 return -ENOMEM; 522 return -ENOMEM;
498 523
499 num_pages = calc_pages_for(pos, len);
500
501 if (file->f_flags & O_DIRECT) { 524 if (file->f_flags & O_DIRECT) {
502 pages = ceph_get_direct_page_vector(data, num_pages, pos, len); 525 pages = ceph_get_direct_page_vector(data, num_pages, false);
503 if (IS_ERR(pages)) { 526 if (IS_ERR(pages)) {
504 ret = PTR_ERR(pages); 527 ret = PTR_ERR(pages);
505 goto out; 528 goto out;
@@ -549,7 +572,7 @@ more:
549 } 572 }
550 573
551 if (file->f_flags & O_DIRECT) 574 if (file->f_flags & O_DIRECT)
552 ceph_put_page_vector(pages, num_pages); 575 ceph_put_page_vector(pages, num_pages, false);
553 else if (file->f_flags & O_SYNC) 576 else if (file->f_flags & O_SYNC)
554 ceph_release_page_vector(pages, num_pages); 577 ceph_release_page_vector(pages, num_pages);
555 578
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 1d6a45b5a04c..e61de4f7b99d 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2,7 +2,6 @@
2 2
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/fs.h> 4#include <linux/fs.h>
5#include <linux/smp_lock.h>
6#include <linux/slab.h> 5#include <linux/slab.h>
7#include <linux/string.h> 6#include <linux/string.h>
8#include <linux/uaccess.h> 7#include <linux/uaccess.h>
@@ -369,6 +368,15 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
369 return &ci->vfs_inode; 368 return &ci->vfs_inode;
370} 369}
371 370
371static void ceph_i_callback(struct rcu_head *head)
372{
373 struct inode *inode = container_of(head, struct inode, i_rcu);
374 struct ceph_inode_info *ci = ceph_inode(inode);
375
376 INIT_LIST_HEAD(&inode->i_dentry);
377 kmem_cache_free(ceph_inode_cachep, ci);
378}
379
372void ceph_destroy_inode(struct inode *inode) 380void ceph_destroy_inode(struct inode *inode)
373{ 381{
374 struct ceph_inode_info *ci = ceph_inode(inode); 382 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -408,7 +416,7 @@ void ceph_destroy_inode(struct inode *inode)
408 if (ci->i_xattrs.prealloc_blob) 416 if (ci->i_xattrs.prealloc_blob)
409 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 417 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
410 418
411 kmem_cache_free(ceph_inode_cachep, ci); 419 call_rcu(&inode->i_rcu, ceph_i_callback);
412} 420}
413 421
414 422
@@ -471,7 +479,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
471 479
472 if (issued & (CEPH_CAP_FILE_EXCL| 480 if (issued & (CEPH_CAP_FILE_EXCL|
473 CEPH_CAP_FILE_WR| 481 CEPH_CAP_FILE_WR|
474 CEPH_CAP_FILE_BUFFER)) { 482 CEPH_CAP_FILE_BUFFER|
483 CEPH_CAP_AUTH_EXCL|
484 CEPH_CAP_XATTR_EXCL)) {
475 if (timespec_compare(ctime, &inode->i_ctime) > 0) { 485 if (timespec_compare(ctime, &inode->i_ctime) > 0) {
476 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n", 486 dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
477 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, 487 inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
@@ -511,7 +521,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
511 warn = 1; 521 warn = 1;
512 } 522 }
513 } else { 523 } else {
514 /* we have no write caps; whatever the MDS says is true */ 524 /* we have no write|excl caps; whatever the MDS says is true */
515 if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { 525 if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
516 inode->i_ctime = *ctime; 526 inode->i_ctime = *ctime;
517 inode->i_mtime = *mtime; 527 inode->i_mtime = *mtime;
@@ -567,12 +577,17 @@ static int fill_inode(struct inode *inode,
567 577
568 /* 578 /*
569 * provided version will be odd if inode value is projected, 579 * provided version will be odd if inode value is projected,
570 * even if stable. skip the update if we have a newer info 580 * even if stable. skip the update if we have newer stable
571 * (e.g., due to inode info racing form multiple MDSs), or if 581 * info (ours>=theirs, e.g. due to racing mds replies), unless
572 * we are getting projected (unstable) inode info. 582 * we are getting projected (unstable) info (in which case the
583 * version is odd, and we want ours>theirs).
584 * us them
585 * 2 2 skip
586 * 3 2 skip
587 * 3 3 update
573 */ 588 */
574 if (le64_to_cpu(info->version) > 0 && 589 if (le64_to_cpu(info->version) > 0 &&
575 (ci->i_version & ~1) > le64_to_cpu(info->version)) 590 (ci->i_version & ~1) >= le64_to_cpu(info->version))
576 goto no_change; 591 goto no_change;
577 592
578 issued = __ceph_caps_issued(ci, &implemented); 593 issued = __ceph_caps_issued(ci, &implemented);
@@ -606,7 +621,14 @@ static int fill_inode(struct inode *inode,
606 le32_to_cpu(info->time_warp_seq), 621 le32_to_cpu(info->time_warp_seq),
607 &ctime, &mtime, &atime); 622 &ctime, &mtime, &atime);
608 623
609 ci->i_max_size = le64_to_cpu(info->max_size); 624 /* only update max_size on auth cap */
625 if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
626 ci->i_max_size != le64_to_cpu(info->max_size)) {
627 dout("max_size %lld -> %llu\n", ci->i_max_size,
628 le64_to_cpu(info->max_size));
629 ci->i_max_size = le64_to_cpu(info->max_size);
630 }
631
610 ci->i_layout = info->layout; 632 ci->i_layout = info->layout;
611 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 633 inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
612 634
@@ -828,13 +850,13 @@ static void ceph_set_dentry_offset(struct dentry *dn)
828 di->offset = ceph_inode(inode)->i_max_offset++; 850 di->offset = ceph_inode(inode)->i_max_offset++;
829 spin_unlock(&inode->i_lock); 851 spin_unlock(&inode->i_lock);
830 852
831 spin_lock(&dcache_lock); 853 spin_lock(&dir->d_lock);
832 spin_lock(&dn->d_lock); 854 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
833 list_move(&dn->d_u.d_child, &dir->d_subdirs); 855 list_move(&dn->d_u.d_child, &dir->d_subdirs);
834 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, 856 dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
835 dn->d_u.d_child.prev, dn->d_u.d_child.next); 857 dn->d_u.d_child.prev, dn->d_u.d_child.next);
836 spin_unlock(&dn->d_lock); 858 spin_unlock(&dn->d_lock);
837 spin_unlock(&dcache_lock); 859 spin_unlock(&dir->d_lock);
838} 860}
839 861
840/* 862/*
@@ -866,8 +888,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
866 } else if (realdn) { 888 } else if (realdn) {
867 dout("dn %p (%d) spliced with %p (%d) " 889 dout("dn %p (%d) spliced with %p (%d) "
868 "inode %p ino %llx.%llx\n", 890 "inode %p ino %llx.%llx\n",
869 dn, atomic_read(&dn->d_count), 891 dn, dn->d_count,
870 realdn, atomic_read(&realdn->d_count), 892 realdn, realdn->d_count,
871 realdn->d_inode, ceph_vinop(realdn->d_inode)); 893 realdn->d_inode, ceph_vinop(realdn->d_inode));
872 dput(dn); 894 dput(dn);
873 dn = realdn; 895 dn = realdn;
@@ -1055,7 +1077,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1055 ininfo = rinfo->targeti.in; 1077 ininfo = rinfo->targeti.in;
1056 vino.ino = le64_to_cpu(ininfo->ino); 1078 vino.ino = le64_to_cpu(ininfo->ino);
1057 vino.snap = le64_to_cpu(ininfo->snapid); 1079 vino.snap = le64_to_cpu(ininfo->snapid);
1058 if (!dn->d_inode) { 1080 in = dn->d_inode;
1081 if (!in) {
1059 in = ceph_get_inode(sb, vino); 1082 in = ceph_get_inode(sb, vino);
1060 if (IS_ERR(in)) { 1083 if (IS_ERR(in)) {
1061 pr_err("fill_trace bad get_inode " 1084 pr_err("fill_trace bad get_inode "
@@ -1217,11 +1240,11 @@ retry_lookup:
1217 goto retry_lookup; 1240 goto retry_lookup;
1218 } else { 1241 } else {
1219 /* reorder parent's d_subdirs */ 1242 /* reorder parent's d_subdirs */
1220 spin_lock(&dcache_lock); 1243 spin_lock(&parent->d_lock);
1221 spin_lock(&dn->d_lock); 1244 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
1222 list_move(&dn->d_u.d_child, &parent->d_subdirs); 1245 list_move(&dn->d_u.d_child, &parent->d_subdirs);
1223 spin_unlock(&dn->d_lock); 1246 spin_unlock(&dn->d_lock);
1224 spin_unlock(&dcache_lock); 1247 spin_unlock(&parent->d_lock);
1225 } 1248 }
1226 1249
1227 di = dn->d_fsdata; 1250 di = dn->d_fsdata;
@@ -1386,11 +1409,8 @@ static void ceph_invalidate_work(struct work_struct *work)
1386 spin_lock(&inode->i_lock); 1409 spin_lock(&inode->i_lock);
1387 dout("invalidate_pages %p gen %d revoking %d\n", inode, 1410 dout("invalidate_pages %p gen %d revoking %d\n", inode,
1388 ci->i_rdcache_gen, ci->i_rdcache_revoking); 1411 ci->i_rdcache_gen, ci->i_rdcache_revoking);
1389 if (ci->i_rdcache_gen == 0 || 1412 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1390 ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1391 BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen);
1392 /* nevermind! */ 1413 /* nevermind! */
1393 ci->i_rdcache_revoking = 0;
1394 spin_unlock(&inode->i_lock); 1414 spin_unlock(&inode->i_lock);
1395 goto out; 1415 goto out;
1396 } 1416 }
@@ -1400,15 +1420,16 @@ static void ceph_invalidate_work(struct work_struct *work)
1400 ceph_invalidate_nondirty_pages(inode->i_mapping); 1420 ceph_invalidate_nondirty_pages(inode->i_mapping);
1401 1421
1402 spin_lock(&inode->i_lock); 1422 spin_lock(&inode->i_lock);
1403 if (orig_gen == ci->i_rdcache_gen) { 1423 if (orig_gen == ci->i_rdcache_gen &&
1424 orig_gen == ci->i_rdcache_revoking) {
1404 dout("invalidate_pages %p gen %d successful\n", inode, 1425 dout("invalidate_pages %p gen %d successful\n", inode,
1405 ci->i_rdcache_gen); 1426 ci->i_rdcache_gen);
1406 ci->i_rdcache_gen = 0; 1427 ci->i_rdcache_revoking--;
1407 ci->i_rdcache_revoking = 0;
1408 check = 1; 1428 check = 1;
1409 } else { 1429 } else {
1410 dout("invalidate_pages %p gen %d raced, gen now %d\n", 1430 dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
1411 inode, orig_gen, ci->i_rdcache_gen); 1431 inode, orig_gen, ci->i_rdcache_gen,
1432 ci->i_rdcache_revoking);
1412 } 1433 }
1413 spin_unlock(&inode->i_lock); 1434 spin_unlock(&inode->i_lock);
1414 1435
@@ -1739,7 +1760,7 @@ int ceph_do_getattr(struct inode *inode, int mask)
1739 return 0; 1760 return 0;
1740 } 1761 }
1741 1762
1742 dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask)); 1763 dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
1743 if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1)) 1764 if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
1744 return 0; 1765 return 0;
1745 1766
@@ -1760,12 +1781,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
1760 * Check inode permissions. We verify we have a valid value for 1781 * Check inode permissions. We verify we have a valid value for
1761 * the AUTH cap, then call the generic handler. 1782 * the AUTH cap, then call the generic handler.
1762 */ 1783 */
1763int ceph_permission(struct inode *inode, int mask) 1784int ceph_permission(struct inode *inode, int mask, unsigned int flags)
1764{ 1785{
1765 int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); 1786 int err;
1787
1788 if (flags & IPERM_FLAG_RCU)
1789 return -ECHILD;
1790
1791 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
1766 1792
1767 if (!err) 1793 if (!err)
1768 err = generic_permission(inode, mask, NULL); 1794 err = generic_permission(inode, mask, flags, NULL);
1769 return err; 1795 return err;
1770} 1796}
1771 1797
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index a6ce54e94eb5..52e8fd74d450 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
4#include <linux/ioctl.h> 4#include <linux/ioctl.h>
5#include <linux/types.h> 5#include <linux/types.h>
6 6
7#define CEPH_IOCTL_MAGIC 0x98 7#define CEPH_IOCTL_MAGIC 0x97
8 8
9/* just use u64 to align sanely on all archs */ 9/* just use u64 to align sanely on all archs */
10struct ceph_ioctl_layout { 10struct ceph_ioctl_layout {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 40abde93c345..476b329867d4 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -11,40 +11,68 @@
11 * Implement fcntl and flock locking functions. 11 * Implement fcntl and flock locking functions.
12 */ 12 */
13static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, 13static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
14 u64 pid, u64 pid_ns, 14 int cmd, u8 wait, struct file_lock *fl)
15 int cmd, u64 start, u64 length, u8 wait)
16{ 15{
17 struct inode *inode = file->f_dentry->d_inode; 16 struct inode *inode = file->f_dentry->d_inode;
18 struct ceph_mds_client *mdsc = 17 struct ceph_mds_client *mdsc =
19 ceph_sb_to_client(inode->i_sb)->mdsc; 18 ceph_sb_to_client(inode->i_sb)->mdsc;
20 struct ceph_mds_request *req; 19 struct ceph_mds_request *req;
21 int err; 20 int err;
21 u64 length = 0;
22 22
23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); 23 req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
24 if (IS_ERR(req)) 24 if (IS_ERR(req))
25 return PTR_ERR(req); 25 return PTR_ERR(req);
26 req->r_inode = igrab(inode); 26 req->r_inode = igrab(inode);
27 27
28 /* mds requires start and length rather than start and end */
29 if (LLONG_MAX == fl->fl_end)
30 length = 0;
31 else
32 length = fl->fl_end - fl->fl_start + 1;
33
28 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 34 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
29 "length: %llu, wait: %d, type`: %d", (int)lock_type, 35 "length: %llu, wait: %d, type`: %d", (int)lock_type,
30 (int)operation, pid, start, length, wait, cmd); 36 (int)operation, (u64)fl->fl_pid, fl->fl_start,
37 length, wait, fl->fl_type);
38
31 39
32 req->r_args.filelock_change.rule = lock_type; 40 req->r_args.filelock_change.rule = lock_type;
33 req->r_args.filelock_change.type = cmd; 41 req->r_args.filelock_change.type = cmd;
34 req->r_args.filelock_change.pid = cpu_to_le64(pid); 42 req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
35 /* This should be adjusted, but I'm not sure if 43 /* This should be adjusted, but I'm not sure if
36 namespaces actually get id numbers*/ 44 namespaces actually get id numbers*/
37 req->r_args.filelock_change.pid_namespace = 45 req->r_args.filelock_change.pid_namespace =
38 cpu_to_le64((u64)pid_ns); 46 cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
39 req->r_args.filelock_change.start = cpu_to_le64(start); 47 req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
40 req->r_args.filelock_change.length = cpu_to_le64(length); 48 req->r_args.filelock_change.length = cpu_to_le64(length);
41 req->r_args.filelock_change.wait = wait; 49 req->r_args.filelock_change.wait = wait;
42 50
43 err = ceph_mdsc_do_request(mdsc, inode, req); 51 err = ceph_mdsc_do_request(mdsc, inode, req);
52
53 if ( operation == CEPH_MDS_OP_GETFILELOCK){
54 fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
55 if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
56 fl->fl_type = F_RDLCK;
57 else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
58 fl->fl_type = F_WRLCK;
59 else
60 fl->fl_type = F_UNLCK;
61
62 fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
63 length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
64 le64_to_cpu(req->r_reply_info.filelock_reply->length);
65 if (length >= 1)
66 fl->fl_end = length -1;
67 else
68 fl->fl_end = 0;
69
70 }
44 ceph_mdsc_put_request(req); 71 ceph_mdsc_put_request(req);
45 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " 72 dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
46 "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type, 73 "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
47 (int)operation, pid, start, length, wait, cmd, err); 74 (int)operation, (u64)fl->fl_pid, fl->fl_start,
75 length, wait, fl->fl_type, err);
48 return err; 76 return err;
49} 77}
50 78
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
54 */ 82 */
55int ceph_lock(struct file *file, int cmd, struct file_lock *fl) 83int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
56{ 84{
57 u64 length;
58 u8 lock_cmd; 85 u8 lock_cmd;
59 int err; 86 int err;
60 u8 wait = 0; 87 u8 wait = 0;
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
76 else 103 else
77 lock_cmd = CEPH_LOCK_UNLOCK; 104 lock_cmd = CEPH_LOCK_UNLOCK;
78 105
79 if (LLONG_MAX == fl->fl_end) 106 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
80 length = 0;
81 else
82 length = fl->fl_end - fl->fl_start + 1;
83
84 err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
85 (u64)fl->fl_pid,
86 (u64)(unsigned long)fl->fl_nspid,
87 lock_cmd, fl->fl_start,
88 length, wait);
89 if (!err) { 107 if (!err) {
90 dout("mds locked, locking locally"); 108 if ( op != CEPH_MDS_OP_GETFILELOCK ){
91 err = posix_lock_file(file, fl, NULL); 109 dout("mds locked, locking locally");
92 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { 110 err = posix_lock_file(file, fl, NULL);
93 /* undo! This should only happen if the kernel detects 111 if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
94 * local deadlock. */ 112 /* undo! This should only happen if the kernel detects
95 ceph_lock_message(CEPH_LOCK_FCNTL, op, file, 113 * local deadlock. */
96 (u64)fl->fl_pid, 114 ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
97 (u64)(unsigned long)fl->fl_nspid, 115 CEPH_LOCK_UNLOCK, 0, fl);
98 CEPH_LOCK_UNLOCK, fl->fl_start, 116 dout("got %d on posix_lock_file, undid lock", err);
99 length, 0); 117 }
100 dout("got %d on posix_lock_file, undid lock", err);
101 } 118 }
119
102 } else { 120 } else {
103 dout("mds returned error code %d", err); 121 dout("mds returned error code %d", err);
104 } 122 }
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
107 125
108int ceph_flock(struct file *file, int cmd, struct file_lock *fl) 126int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
109{ 127{
110 u64 length;
111 u8 lock_cmd; 128 u8 lock_cmd;
112 int err; 129 int err;
113 u8 wait = 1; 130 u8 wait = 1;
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
127 lock_cmd = CEPH_LOCK_EXCL; 144 lock_cmd = CEPH_LOCK_EXCL;
128 else 145 else
129 lock_cmd = CEPH_LOCK_UNLOCK; 146 lock_cmd = CEPH_LOCK_UNLOCK;
130 /* mds requires start and length rather than start and end */
131 if (LLONG_MAX == fl->fl_end)
132 length = 0;
133 else
134 length = fl->fl_end - fl->fl_start + 1;
135 147
136 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK, 148 err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
137 file, (u64)fl->fl_pid, 149 file, lock_cmd, wait, fl);
138 (u64)(unsigned long)fl->fl_nspid,
139 lock_cmd, fl->fl_start,
140 length, wait);
141 if (!err) { 150 if (!err) {
142 err = flock_lock_file_wait(file, fl); 151 err = flock_lock_file_wait(file, fl);
143 if (err) { 152 if (err) {
144 ceph_lock_message(CEPH_LOCK_FLOCK, 153 ceph_lock_message(CEPH_LOCK_FLOCK,
145 CEPH_MDS_OP_SETFILELOCK, 154 CEPH_MDS_OP_SETFILELOCK,
146 file, (u64)fl->fl_pid, 155 file, CEPH_LOCK_UNLOCK, 0, fl);
147 (u64)(unsigned long)fl->fl_nspid,
148 CEPH_LOCK_UNLOCK, fl->fl_start,
149 length, 0);
150 dout("got %d on flock_lock_file_wait, undid lock", err); 156 dout("got %d on flock_lock_file_wait, undid lock", err);
151 } 157 }
152 } else { 158 } else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3142b15940c2..a50fca1e03be 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -6,7 +6,6 @@
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/debugfs.h> 7#include <linux/debugfs.h>
8#include <linux/seq_file.h> 8#include <linux/seq_file.h>
9#include <linux/smp_lock.h>
10 9
11#include "super.h" 10#include "super.h"
12#include "mds_client.h" 11#include "mds_client.h"
@@ -203,6 +202,38 @@ out_bad:
203} 202}
204 203
205/* 204/*
205 * parse fcntl F_GETLK results
206 */
207static int parse_reply_info_filelock(void **p, void *end,
208 struct ceph_mds_reply_info_parsed *info)
209{
210 if (*p + sizeof(*info->filelock_reply) > end)
211 goto bad;
212
213 info->filelock_reply = *p;
214 *p += sizeof(*info->filelock_reply);
215
216 if (unlikely(*p != end))
217 goto bad;
218 return 0;
219
220bad:
221 return -EIO;
222}
223
224/*
225 * parse extra results
226 */
227static int parse_reply_info_extra(void **p, void *end,
228 struct ceph_mds_reply_info_parsed *info)
229{
230 if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
231 return parse_reply_info_filelock(p, end, info);
232 else
233 return parse_reply_info_dir(p, end, info);
234}
235
236/*
206 * parse entire mds reply 237 * parse entire mds reply
207 */ 238 */
208static int parse_reply_info(struct ceph_msg *msg, 239static int parse_reply_info(struct ceph_msg *msg,
@@ -224,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg,
224 goto out_bad; 255 goto out_bad;
225 } 256 }
226 257
227 /* dir content */ 258 /* extra */
228 ceph_decode_32_safe(&p, end, len, bad); 259 ceph_decode_32_safe(&p, end, len, bad);
229 if (len > 0) { 260 if (len > 0) {
230 err = parse_reply_info_dir(&p, p+len, info); 261 err = parse_reply_info_extra(&p, p+len, info);
231 if (err < 0) 262 if (err < 0)
232 goto out_bad; 263 goto out_bad;
233 } 264 }
@@ -529,6 +560,9 @@ static void __register_request(struct ceph_mds_client *mdsc,
529 ceph_mdsc_get_request(req); 560 ceph_mdsc_get_request(req);
530 __insert_request(mdsc, req); 561 __insert_request(mdsc, req);
531 562
563 req->r_uid = current_fsuid();
564 req->r_gid = current_fsgid();
565
532 if (dir) { 566 if (dir) {
533 struct ceph_inode_info *ci = ceph_inode(dir); 567 struct ceph_inode_info *ci = ceph_inode(dir);
534 568
@@ -1452,7 +1486,7 @@ retry:
1452 *base = ceph_ino(temp->d_inode); 1486 *base = ceph_ino(temp->d_inode);
1453 *plen = len; 1487 *plen = len;
1454 dout("build_path on %p %d built %llx '%.*s'\n", 1488 dout("build_path on %p %d built %llx '%.*s'\n",
1455 dentry, atomic_read(&dentry->d_count), *base, len, path); 1489 dentry, dentry->d_count, *base, len, path);
1456 return path; 1490 return path;
1457} 1491}
1458 1492
@@ -1588,8 +1622,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
1588 1622
1589 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); 1623 head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
1590 head->op = cpu_to_le32(req->r_op); 1624 head->op = cpu_to_le32(req->r_op);
1591 head->caller_uid = cpu_to_le32(current_fsuid()); 1625 head->caller_uid = cpu_to_le32(req->r_uid);
1592 head->caller_gid = cpu_to_le32(current_fsgid()); 1626 head->caller_gid = cpu_to_le32(req->r_gid);
1593 head->args = req->r_args; 1627 head->args = req->r_args;
1594 1628
1595 ceph_encode_filepath(&p, end, ino1, path1); 1629 ceph_encode_filepath(&p, end, ino1, path1);
@@ -2072,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2072 2106
2073 mutex_lock(&session->s_mutex); 2107 mutex_lock(&session->s_mutex);
2074 if (err < 0) { 2108 if (err < 0) {
2075 pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds); 2109 pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
2076 ceph_msg_dump(msg); 2110 ceph_msg_dump(msg);
2077 goto out_err; 2111 goto out_err;
2078 } 2112 }
@@ -2092,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
2092 mutex_lock(&req->r_fill_mutex); 2126 mutex_lock(&req->r_fill_mutex);
2093 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); 2127 err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
2094 if (err == 0) { 2128 if (err == 0) {
2095 if (result == 0 && rinfo->dir_nr) 2129 if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
2130 rinfo->dir_nr)
2096 ceph_readdir_prepopulate(req, req->r_session); 2131 ceph_readdir_prepopulate(req, req->r_session);
2097 ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 2132 ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
2098 } 2133 }
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d66d63c72355..aabe563b54db 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in {
42}; 42};
43 43
44/* 44/*
45 * parsed info about an mds reply, including information about the 45 * parsed info about an mds reply, including information about
46 * target inode and/or its parent directory and dentry, and directory 46 * either: 1) the target inode and/or its parent directory and dentry,
47 * contents (for readdir results). 47 * and directory contents (for readdir results), or
48 * 2) the file range lock info (for fcntl F_GETLK results).
48 */ 49 */
49struct ceph_mds_reply_info_parsed { 50struct ceph_mds_reply_info_parsed {
50 struct ceph_mds_reply_head *head; 51 struct ceph_mds_reply_head *head;
51 52
53 /* trace */
52 struct ceph_mds_reply_info_in diri, targeti; 54 struct ceph_mds_reply_info_in diri, targeti;
53 struct ceph_mds_reply_dirfrag *dirfrag; 55 struct ceph_mds_reply_dirfrag *dirfrag;
54 char *dname; 56 char *dname;
55 u32 dname_len; 57 u32 dname_len;
56 struct ceph_mds_reply_lease *dlease; 58 struct ceph_mds_reply_lease *dlease;
57 59
58 struct ceph_mds_reply_dirfrag *dir_dir; 60 /* extra */
59 int dir_nr; 61 union {
60 char **dir_dname; 62 /* for fcntl F_GETLK results */
61 u32 *dir_dname_len; 63 struct ceph_filelock *filelock_reply;
62 struct ceph_mds_reply_lease **dir_dlease; 64
63 struct ceph_mds_reply_info_in *dir_in; 65 /* for readdir results */
64 u8 dir_complete, dir_end; 66 struct {
67 struct ceph_mds_reply_dirfrag *dir_dir;
68 int dir_nr;
69 char **dir_dname;
70 u32 *dir_dname_len;
71 struct ceph_mds_reply_lease **dir_dlease;
72 struct ceph_mds_reply_info_in *dir_in;
73 u8 dir_complete, dir_end;
74 };
75 };
65 76
66 /* encoded blob describing snapshot contexts for certain 77 /* encoded blob describing snapshot contexts for certain
67 operations (e.g., open) */ 78 operations (e.g., open) */
@@ -170,6 +181,8 @@ struct ceph_mds_request {
170 181
171 union ceph_mds_request_args r_args; 182 union ceph_mds_request_args r_args;
172 int r_fmode; /* file mode, if expecting cap */ 183 int r_fmode; /* file mode, if expecting cap */
184 uid_t r_uid;
185 gid_t r_gid;
173 186
174 /* for choosing which mds to send this request to */ 187 /* for choosing which mds to send this request to */
175 int r_direct_mode; 188 int r_direct_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1886294e12f7..4553d8829edb 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,9 +293,7 @@ struct ceph_inode_info {
293 int i_rd_ref, i_rdcache_ref, i_wr_ref; 293 int i_rd_ref, i_rdcache_ref, i_wr_ref;
294 int i_wrbuffer_ref, i_wrbuffer_ref_head; 294 int i_wrbuffer_ref, i_wrbuffer_ref_head;
295 u32 i_shared_gen; /* increment each time we get FILE_SHARED */ 295 u32 i_shared_gen; /* increment each time we get FILE_SHARED */
296 u32 i_rdcache_gen; /* we increment this each time we get 296 u32 i_rdcache_gen; /* incremented each time we get FILE_CACHE. */
297 FILE_CACHE. If it's non-zero, we
298 _may_ have cached pages. */
299 u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */ 297 u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
300 298
301 struct list_head i_unsafe_writes; /* uncommitted sync writes */ 299 struct list_head i_unsafe_writes; /* uncommitted sync writes */
@@ -667,7 +665,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
667extern void ceph_queue_writeback(struct inode *inode); 665extern void ceph_queue_writeback(struct inode *inode);
668 666
669extern int ceph_do_getattr(struct inode *inode, int mask); 667extern int ceph_do_getattr(struct inode *inode, int mask);
670extern int ceph_permission(struct inode *inode, int mask); 668extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
671extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); 669extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
672extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 670extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
673 struct kstat *stat); 671 struct kstat *stat);
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 0ed213970ced..ee45648b0d1a 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -4,6 +4,7 @@ config CIFS
4 select NLS 4 select NLS
5 select CRYPTO 5 select CRYPTO
6 select CRYPTO_MD5 6 select CRYPTO_MD5
7 select CRYPTO_HMAC
7 select CRYPTO_ARC4 8 select CRYPTO_ARC4
8 help 9 help
9 This is the client VFS module for the Common Internet File System 10 This is the client VFS module for the Common Internet File System
@@ -143,6 +144,13 @@ config CIFS_FSCACHE
143 to be cached locally on disk through the general filesystem cache 144 to be cached locally on disk through the general filesystem cache
144 manager. If unsure, say N. 145 manager. If unsure, say N.
145 146
147config CIFS_ACL
148 bool "Provide CIFS ACL support (EXPERIMENTAL)"
149 depends on EXPERIMENTAL && CIFS_XATTR
150 help
151 Allows to fetch CIFS/NTFS ACL from the server. The DACL blob
152 is handed over to the application/caller.
153
146config CIFS_EXPERIMENTAL 154config CIFS_EXPERIMENTAL
147 bool "CIFS Experimental Features (EXPERIMENTAL)" 155 bool "CIFS Experimental Features (EXPERIMENTAL)"
148 depends on CIFS && EXPERIMENTAL 156 depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index adefa60a9bdc..43b19dd39191 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,7 +6,9 @@ obj-$(CONFIG_CIFS) += cifs.o
6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \ 6cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \ 7 link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \ 8 md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
9 readdir.o ioctl.o sess.o export.o cifsacl.o 9 readdir.o ioctl.o sess.o export.o
10
11cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
10 12
11cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o 13cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
12 14
diff --git a/fs/cifs/README b/fs/cifs/README
index ee68d1036544..46af99ab3614 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -337,6 +337,15 @@ A partial list of the supported mount options follows:
337 wsize default write size (default 57344) 337 wsize default write size (default 57344)
338 maximum wsize currently allowed by CIFS is 57344 (fourteen 338 maximum wsize currently allowed by CIFS is 57344 (fourteen
339 4096 byte pages) 339 4096 byte pages)
340 actimeo=n attribute cache timeout in seconds (default 1 second).
341 After this timeout, the cifs client requests fresh attribute
342 information from the server. This option allows to tune the
343 attribute cache timeout to suit the workload needs. Shorter
344 timeouts mean better the cache coherency, but increased number
345 of calls to the server. Longer timeouts mean reduced number
346 of calls to the server at the expense of less stricter cache
347 coherency checks (i.e. incorrect attribute cache for a short
348 period of time).
340 rw mount the network share read-write (note that the 349 rw mount the network share read-write (note that the
341 server may still consider the share read-only) 350 server may still consider the share read-only)
342 ro mount network share read-only 351 ro mount network share read-only
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 5aff46c61e52..355abcdcda98 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -81,7 +81,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for
81 81
82v) mount check for unmatched uids 82v) mount check for unmatched uids
83 83
84w) Add support for new vfs entry points for setlease and fallocate 84w) Add support for new vfs entry point for fallocate
85 85
86x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of 86x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of
87processes can proceed better in parallel (on the server) 87processes can proceed better in parallel (on the server)
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 525ba59a4105..7852cd677051 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -15,7 +15,7 @@
15 * the GNU Lesser General Public License for more details. 15 * the GNU Lesser General Public License for more details.
16 * 16 *
17 */ 17 */
18#include <linux/radix-tree.h> 18#include <linux/rbtree.h>
19 19
20#ifndef _CIFS_FS_SB_H 20#ifndef _CIFS_FS_SB_H
21#define _CIFS_FS_SB_H 21#define _CIFS_FS_SB_H
@@ -42,12 +42,13 @@
42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ 42#define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */
43 43
44struct cifs_sb_info { 44struct cifs_sb_info {
45 struct radix_tree_root tlink_tree; 45 struct rb_root tlink_tree;
46#define CIFS_TLINK_MASTER_TAG 0 /* is "master" (mount) tcon */
47 spinlock_t tlink_tree_lock; 46 spinlock_t tlink_tree_lock;
47 struct tcon_link *master_tlink;
48 struct nls_table *local_nls; 48 struct nls_table *local_nls;
49 unsigned int rsize; 49 unsigned int rsize;
50 unsigned int wsize; 50 unsigned int wsize;
51 unsigned long actimeo; /* attribute cache timeout (jiffies) */
51 atomic_t active; 52 atomic_t active;
52 uid_t mnt_uid; 53 uid_t mnt_uid;
53 gid_t mnt_gid; 54 gid_t mnt_gid;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c9b4792ae825..a437ec391a01 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -30,8 +30,6 @@
30#include "cifs_debug.h" 30#include "cifs_debug.h"
31 31
32 32
33#ifdef CONFIG_CIFS_EXPERIMENTAL
34
35static struct cifs_wksid wksidarr[NUM_WK_SIDS] = { 33static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
36 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"}, 34 {{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
37 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"}, 35 {{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
@@ -560,7 +558,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
560 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); 558 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
561 559
562 if (IS_ERR(tlink)) 560 if (IS_ERR(tlink))
563 return NULL; 561 return ERR_CAST(tlink);
564 562
565 xid = GetXid(); 563 xid = GetXid();
566 rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen); 564 rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen);
@@ -568,7 +566,9 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
568 566
569 cifs_put_tlink(tlink); 567 cifs_put_tlink(tlink);
570 568
571 cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); 569 cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
570 if (rc)
571 return ERR_PTR(rc);
572 return pntsd; 572 return pntsd;
573} 573}
574 574
@@ -583,7 +583,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
583 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); 583 struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
584 584
585 if (IS_ERR(tlink)) 585 if (IS_ERR(tlink))
586 return NULL; 586 return ERR_CAST(tlink);
587 587
588 tcon = tlink_tcon(tlink); 588 tcon = tlink_tcon(tlink);
589 xid = GetXid(); 589 xid = GetXid();
@@ -591,23 +591,22 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
591 rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0, 591 rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0,
592 &fid, &oplock, NULL, cifs_sb->local_nls, 592 &fid, &oplock, NULL, cifs_sb->local_nls,
593 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 593 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
594 if (rc) { 594 if (!rc) {
595 cERROR(1, "Unable to open file to get ACL"); 595 rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen);
596 goto out; 596 CIFSSMBClose(xid, tcon, fid);
597 } 597 }
598 598
599 rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen);
600 cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
601
602 CIFSSMBClose(xid, tcon, fid);
603 out:
604 cifs_put_tlink(tlink); 599 cifs_put_tlink(tlink);
605 FreeXid(xid); 600 FreeXid(xid);
601
602 cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
603 if (rc)
604 return ERR_PTR(rc);
606 return pntsd; 605 return pntsd;
607} 606}
608 607
609/* Retrieve an ACL from the server */ 608/* Retrieve an ACL from the server */
610static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, 609struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
611 struct inode *inode, const char *path, 610 struct inode *inode, const char *path,
612 u32 *pacllen) 611 u32 *pacllen)
613{ 612{
@@ -695,7 +694,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
695} 694}
696 695
697/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ 696/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
698void 697int
699cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr, 698cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
700 struct inode *inode, const char *path, const __u16 *pfid) 699 struct inode *inode, const char *path, const __u16 *pfid)
701{ 700{
@@ -711,17 +710,21 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
711 pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen); 710 pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen);
712 711
713 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ 712 /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
714 if (pntsd) 713 if (IS_ERR(pntsd)) {
714 rc = PTR_ERR(pntsd);
715 cERROR(1, "%s: error %d getting sec desc", __func__, rc);
716 } else {
715 rc = parse_sec_desc(pntsd, acllen, fattr); 717 rc = parse_sec_desc(pntsd, acllen, fattr);
716 if (rc) 718 kfree(pntsd);
717 cFYI(1, "parse sec desc failed rc = %d", rc); 719 if (rc)
720 cERROR(1, "parse sec desc failed rc = %d", rc);
721 }
718 722
719 kfree(pntsd); 723 return rc;
720 return;
721} 724}
722 725
723/* Convert mode bits to an ACL so we can update the ACL on the server */ 726/* Convert mode bits to an ACL so we can update the ACL on the server */
724int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) 727int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode)
725{ 728{
726 int rc = 0; 729 int rc = 0;
727 __u32 secdesclen = 0; 730 __u32 secdesclen = 0;
@@ -736,7 +739,10 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
736 /* Add three ACEs for owner, group, everyone getting rid of 739 /* Add three ACEs for owner, group, everyone getting rid of
737 other ACEs as chmod disables ACEs and set the security descriptor */ 740 other ACEs as chmod disables ACEs and set the security descriptor */
738 741
739 if (pntsd) { 742 if (IS_ERR(pntsd)) {
743 rc = PTR_ERR(pntsd);
744 cERROR(1, "%s: error %d getting sec desc", __func__, rc);
745 } else {
740 /* allocate memory for the smb header, 746 /* allocate memory for the smb header,
741 set security descriptor request security descriptor 747 set security descriptor request security descriptor
742 parameters, and secuirty descriptor itself */ 748 parameters, and secuirty descriptor itself */
@@ -766,4 +772,3 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
766 772
767 return rc; 773 return rc;
768} 774}
769#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 6c8096cf5155..c4ae7d036563 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -74,11 +74,7 @@ struct cifs_wksid {
74 char sidname[SIDNAMELENGTH]; 74 char sidname[SIDNAMELENGTH];
75} __attribute__((packed)); 75} __attribute__((packed));
76 76
77#ifdef CONFIG_CIFS_EXPERIMENTAL
78
79extern int match_sid(struct cifs_sid *); 77extern int match_sid(struct cifs_sid *);
80extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *); 78extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
81 79
82#endif /* CONFIG_CIFS_EXPERIMENTAL */
83
84#endif /* _CIFSACL_H */ 80#endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 75c4eaa79588..8e21e0fe65d5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -116,7 +116,7 @@ cifs_read_super(struct super_block *sb, void *data,
116 return -ENOMEM; 116 return -ENOMEM;
117 117
118 spin_lock_init(&cifs_sb->tlink_tree_lock); 118 spin_lock_init(&cifs_sb->tlink_tree_lock);
119 INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL); 119 cifs_sb->tlink_tree = RB_ROOT;
120 120
121 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); 121 rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
122 if (rc) { 122 if (rc) {
@@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
283 return 0; 283 return 0;
284} 284}
285 285
286static int cifs_permission(struct inode *inode, int mask) 286static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
287{ 287{
288 struct cifs_sb_info *cifs_sb; 288 struct cifs_sb_info *cifs_sb;
289 289
290 if (flags & IPERM_FLAG_RCU)
291 return -ECHILD;
292
290 cifs_sb = CIFS_SB(inode->i_sb); 293 cifs_sb = CIFS_SB(inode->i_sb);
291 294
292 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { 295 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
@@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask)
298 on the client (above and beyond ACL on servers) for 301 on the client (above and beyond ACL on servers) for
299 servers which do not support setting and viewing mode bits, 302 servers which do not support setting and viewing mode bits,
300 so allowing client to check permissions is useful */ 303 so allowing client to check permissions is useful */
301 return generic_permission(inode, mask, NULL); 304 return generic_permission(inode, mask, flags, NULL);
302} 305}
303 306
304static struct kmem_cache *cifs_inode_cachep; 307static struct kmem_cache *cifs_inode_cachep;
@@ -321,8 +324,7 @@ cifs_alloc_inode(struct super_block *sb)
321 /* Until the file is open and we have gotten oplock 324 /* Until the file is open and we have gotten oplock
322 info back from the server, can not assume caching of 325 info back from the server, can not assume caching of
323 file data or metadata */ 326 file data or metadata */
324 cifs_inode->clientCanCacheRead = false; 327 cifs_set_oplock_level(cifs_inode, 0);
325 cifs_inode->clientCanCacheAll = false;
326 cifs_inode->delete_pending = false; 328 cifs_inode->delete_pending = false;
327 cifs_inode->invalid_mapping = false; 329 cifs_inode->invalid_mapping = false;
328 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 330 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
@@ -335,10 +337,17 @@ cifs_alloc_inode(struct super_block *sb)
335 return &cifs_inode->vfs_inode; 337 return &cifs_inode->vfs_inode;
336} 338}
337 339
340static void cifs_i_callback(struct rcu_head *head)
341{
342 struct inode *inode = container_of(head, struct inode, i_rcu);
343 INIT_LIST_HEAD(&inode->i_dentry);
344 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
345}
346
338static void 347static void
339cifs_destroy_inode(struct inode *inode) 348cifs_destroy_inode(struct inode *inode)
340{ 349{
341 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); 350 call_rcu(&inode->i_rcu, cifs_i_callback);
342} 351}
343 352
344static void 353static void
@@ -459,9 +468,13 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
459 seq_printf(s, ",acl"); 468 seq_printf(s, ",acl");
460 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) 469 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
461 seq_printf(s, ",mfsymlinks"); 470 seq_printf(s, ",mfsymlinks");
471 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
472 seq_printf(s, ",fsc");
462 473
463 seq_printf(s, ",rsize=%d", cifs_sb->rsize); 474 seq_printf(s, ",rsize=%d", cifs_sb->rsize);
464 seq_printf(s, ",wsize=%d", cifs_sb->wsize); 475 seq_printf(s, ",wsize=%d", cifs_sb->wsize);
476 /* convert actimeo and display it in seconds */
477 seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
465 478
466 return 0; 479 return 0;
467} 480}
@@ -934,7 +947,6 @@ init_cifs(void)
934 GlobalCurrentXid = 0; 947 GlobalCurrentXid = 0;
935 GlobalTotalActiveXid = 0; 948 GlobalTotalActiveXid = 0;
936 GlobalMaxActiveXid = 0; 949 GlobalMaxActiveXid = 0;
937 memset(Local_System_Name, 0, 15);
938 spin_lock_init(&cifs_tcp_ses_lock); 950 spin_lock_init(&cifs_tcp_ses_lock);
939 spin_lock_init(&cifs_file_list_lock); 951 spin_lock_init(&cifs_file_list_lock);
940 spin_lock_init(&GlobalMid_Lock); 952 spin_lock_init(&GlobalMid_Lock);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f259e4d7612d..7136c0c3e2f9 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -45,6 +45,16 @@
45#define CIFS_MIN_RCV_POOL 4 45#define CIFS_MIN_RCV_POOL 4
46 46
47/* 47/*
48 * default attribute cache timeout (jiffies)
49 */
50#define CIFS_DEF_ACTIMEO (1 * HZ)
51
52/*
53 * max attribute cache timeout (jiffies) - 2^30
54 */
55#define CIFS_MAX_ACTIMEO (1 << 30)
56
57/*
48 * MAX_REQ is the maximum number of requests that WE will send 58 * MAX_REQ is the maximum number of requests that WE will send
49 * on one socket concurrently. It also matches the most common 59 * on one socket concurrently. It also matches the most common
50 * value of max multiplex returned by servers. We may 60 * value of max multiplex returned by servers. We may
@@ -336,7 +346,8 @@ struct cifsTconInfo {
336 * "get" on the container. 346 * "get" on the container.
337 */ 347 */
338struct tcon_link { 348struct tcon_link {
339 unsigned long tl_index; 349 struct rb_node tl_rbnode;
350 uid_t tl_uid;
340 unsigned long tl_flags; 351 unsigned long tl_flags;
341#define TCON_LINK_MASTER 0 352#define TCON_LINK_MASTER 0
342#define TCON_LINK_PENDING 1 353#define TCON_LINK_PENDING 1
@@ -745,8 +756,6 @@ GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
745GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ 756GLOBAL_EXTERN unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */
746GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */ 757GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */
747 /* on midQ entries */ 758 /* on midQ entries */
748GLOBAL_EXTERN char Local_System_Name[15];
749
750/* 759/*
751 * Global counters, updated atomically 760 * Global counters, updated atomically
752 */ 761 */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index edb6d90efdf2..e6d1481b16c1 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -54,7 +54,8 @@ do { \
54 __func__, curr_xid, (int)rc); \ 54 __func__, curr_xid, (int)rc); \
55} while (0) 55} while (0)
56extern char *build_path_from_dentry(struct dentry *); 56extern char *build_path_from_dentry(struct dentry *);
57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb); 57extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
58 struct cifsTconInfo *tcon);
58extern char *build_wildcard_path_from_dentry(struct dentry *direntry); 59extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
59extern char *cifs_compose_mount_options(const char *sb_mountdata, 60extern char *cifs_compose_mount_options(const char *sb_mountdata,
60 const char *fullpath, const struct dfs_info3_param *ref, 61 const char *fullpath, const struct dfs_info3_param *ref,
@@ -79,9 +80,7 @@ extern bool is_valid_oplock_break(struct smb_hdr *smb,
79 struct TCP_Server_Info *); 80 struct TCP_Server_Info *);
80extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); 81extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
81extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); 82extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
82#ifdef CONFIG_CIFS_EXPERIMENTAL
83extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); 83extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
84#endif
85extern unsigned int smbCalcSize(struct smb_hdr *ptr); 84extern unsigned int smbCalcSize(struct smb_hdr *ptr);
86extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); 85extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
87extern int decode_negTokenInit(unsigned char *security_blob, int length, 86extern int decode_negTokenInit(unsigned char *security_blob, int length,
@@ -104,6 +103,7 @@ extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
104extern u64 cifs_UnixTimeToNT(struct timespec); 103extern u64 cifs_UnixTimeToNT(struct timespec);
105extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, 104extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
106 int offset); 105 int offset);
106extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
107 107
108extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, 108extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle,
109 struct file *file, struct tcon_link *tlink, 109 struct file *file, struct tcon_link *tlink,
@@ -129,10 +129,12 @@ extern int cifs_get_file_info_unix(struct file *filp);
129extern int cifs_get_inode_info_unix(struct inode **pinode, 129extern int cifs_get_inode_info_unix(struct inode **pinode,
130 const unsigned char *search_path, 130 const unsigned char *search_path,
131 struct super_block *sb, int xid); 131 struct super_block *sb, int xid);
132extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, 132extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
133 struct cifs_fattr *fattr, struct inode *inode, 133 struct cifs_fattr *fattr, struct inode *inode,
134 const char *path, const __u16 *pfid); 134 const char *path, const __u16 *pfid);
135extern int mode_to_acl(struct inode *inode, const char *path, __u64); 135extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
136extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
137 const char *, u32 *);
136 138
137extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, 139extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
138 const char *); 140 const char *);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2f2632b6df5a..67acfb3acad2 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2478,95 +2478,6 @@ querySymLinkRetry:
2478} 2478}
2479 2479
2480#ifdef CONFIG_CIFS_EXPERIMENTAL 2480#ifdef CONFIG_CIFS_EXPERIMENTAL
2481/* Initialize NT TRANSACT SMB into small smb request buffer.
2482 This assumes that all NT TRANSACTS that we init here have
2483 total parm and data under about 400 bytes (to fit in small cifs
2484 buffer size), which is the case so far, it easily fits. NB:
2485 Setup words themselves and ByteCount
2486 MaxSetupCount (size of returned setup area) and
2487 MaxParameterCount (returned parms size) must be set by caller */
2488static int
2489smb_init_nttransact(const __u16 sub_command, const int setup_count,
2490 const int parm_len, struct cifsTconInfo *tcon,
2491 void **ret_buf)
2492{
2493 int rc;
2494 __u32 temp_offset;
2495 struct smb_com_ntransact_req *pSMB;
2496
2497 rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
2498 (void **)&pSMB);
2499 if (rc)
2500 return rc;
2501 *ret_buf = (void *)pSMB;
2502 pSMB->Reserved = 0;
2503 pSMB->TotalParameterCount = cpu_to_le32(parm_len);
2504 pSMB->TotalDataCount = 0;
2505 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
2506 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
2507 pSMB->ParameterCount = pSMB->TotalParameterCount;
2508 pSMB->DataCount = pSMB->TotalDataCount;
2509 temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
2510 (setup_count * 2) - 4 /* for rfc1001 length itself */;
2511 pSMB->ParameterOffset = cpu_to_le32(temp_offset);
2512 pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
2513 pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
2514 pSMB->SubCommand = cpu_to_le16(sub_command);
2515 return 0;
2516}
2517
2518static int
2519validate_ntransact(char *buf, char **ppparm, char **ppdata,
2520 __u32 *pparmlen, __u32 *pdatalen)
2521{
2522 char *end_of_smb;
2523 __u32 data_count, data_offset, parm_count, parm_offset;
2524 struct smb_com_ntransact_rsp *pSMBr;
2525
2526 *pdatalen = 0;
2527 *pparmlen = 0;
2528
2529 if (buf == NULL)
2530 return -EINVAL;
2531
2532 pSMBr = (struct smb_com_ntransact_rsp *)buf;
2533
2534 /* ByteCount was converted from little endian in SendReceive */
2535 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
2536 (char *)&pSMBr->ByteCount;
2537
2538 data_offset = le32_to_cpu(pSMBr->DataOffset);
2539 data_count = le32_to_cpu(pSMBr->DataCount);
2540 parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
2541 parm_count = le32_to_cpu(pSMBr->ParameterCount);
2542
2543 *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
2544 *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
2545
2546 /* should we also check that parm and data areas do not overlap? */
2547 if (*ppparm > end_of_smb) {
2548 cFYI(1, "parms start after end of smb");
2549 return -EINVAL;
2550 } else if (parm_count + *ppparm > end_of_smb) {
2551 cFYI(1, "parm end after end of smb");
2552 return -EINVAL;
2553 } else if (*ppdata > end_of_smb) {
2554 cFYI(1, "data starts after end of smb");
2555 return -EINVAL;
2556 } else if (data_count + *ppdata > end_of_smb) {
2557 cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
2558 *ppdata, data_count, (data_count + *ppdata),
2559 end_of_smb, pSMBr);
2560 return -EINVAL;
2561 } else if (parm_count + data_count > pSMBr->ByteCount) {
2562 cFYI(1, "parm count and data count larger than SMB");
2563 return -EINVAL;
2564 }
2565 *pdatalen = data_count;
2566 *pparmlen = parm_count;
2567 return 0;
2568}
2569
2570int 2481int
2571CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon, 2482CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
2572 const unsigned char *searchName, 2483 const unsigned char *searchName,
@@ -3056,7 +2967,97 @@ GetExtAttrOut:
3056 2967
3057#endif /* CONFIG_POSIX */ 2968#endif /* CONFIG_POSIX */
3058 2969
3059#ifdef CONFIG_CIFS_EXPERIMENTAL 2970#ifdef CONFIG_CIFS_ACL
2971/*
2972 * Initialize NT TRANSACT SMB into small smb request buffer. This assumes that
2973 * all NT TRANSACTS that we init here have total parm and data under about 400
2974 * bytes (to fit in small cifs buffer size), which is the case so far, it
2975 * easily fits. NB: Setup words themselves and ByteCount MaxSetupCount (size of
2976 * returned setup area) and MaxParameterCount (returned parms size) must be set
2977 * by caller
2978 */
2979static int
2980smb_init_nttransact(const __u16 sub_command, const int setup_count,
2981 const int parm_len, struct cifsTconInfo *tcon,
2982 void **ret_buf)
2983{
2984 int rc;
2985 __u32 temp_offset;
2986 struct smb_com_ntransact_req *pSMB;
2987
2988 rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
2989 (void **)&pSMB);
2990 if (rc)
2991 return rc;
2992 *ret_buf = (void *)pSMB;
2993 pSMB->Reserved = 0;
2994 pSMB->TotalParameterCount = cpu_to_le32(parm_len);
2995 pSMB->TotalDataCount = 0;
2996 pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
2997 MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
2998 pSMB->ParameterCount = pSMB->TotalParameterCount;
2999 pSMB->DataCount = pSMB->TotalDataCount;
3000 temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
3001 (setup_count * 2) - 4 /* for rfc1001 length itself */;
3002 pSMB->ParameterOffset = cpu_to_le32(temp_offset);
3003 pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
3004 pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
3005 pSMB->SubCommand = cpu_to_le16(sub_command);
3006 return 0;
3007}
3008
3009static int
3010validate_ntransact(char *buf, char **ppparm, char **ppdata,
3011 __u32 *pparmlen, __u32 *pdatalen)
3012{
3013 char *end_of_smb;
3014 __u32 data_count, data_offset, parm_count, parm_offset;
3015 struct smb_com_ntransact_rsp *pSMBr;
3016
3017 *pdatalen = 0;
3018 *pparmlen = 0;
3019
3020 if (buf == NULL)
3021 return -EINVAL;
3022
3023 pSMBr = (struct smb_com_ntransact_rsp *)buf;
3024
3025 /* ByteCount was converted from little endian in SendReceive */
3026 end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
3027 (char *)&pSMBr->ByteCount;
3028
3029 data_offset = le32_to_cpu(pSMBr->DataOffset);
3030 data_count = le32_to_cpu(pSMBr->DataCount);
3031 parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
3032 parm_count = le32_to_cpu(pSMBr->ParameterCount);
3033
3034 *ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
3035 *ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
3036
3037 /* should we also check that parm and data areas do not overlap? */
3038 if (*ppparm > end_of_smb) {
3039 cFYI(1, "parms start after end of smb");
3040 return -EINVAL;
3041 } else if (parm_count + *ppparm > end_of_smb) {
3042 cFYI(1, "parm end after end of smb");
3043 return -EINVAL;
3044 } else if (*ppdata > end_of_smb) {
3045 cFYI(1, "data starts after end of smb");
3046 return -EINVAL;
3047 } else if (data_count + *ppdata > end_of_smb) {
3048 cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
3049 *ppdata, data_count, (data_count + *ppdata),
3050 end_of_smb, pSMBr);
3051 return -EINVAL;
3052 } else if (parm_count + data_count > pSMBr->ByteCount) {
3053 cFYI(1, "parm count and data count larger than SMB");
3054 return -EINVAL;
3055 }
3056 *pdatalen = data_count;
3057 *pparmlen = parm_count;
3058 return 0;
3059}
3060
3060/* Get Security Descriptor (by handle) from remote server for a file or dir */ 3061/* Get Security Descriptor (by handle) from remote server for a file or dir */
3061int 3062int
3062CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid, 3063CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
@@ -3214,7 +3215,7 @@ setCifsAclRetry:
3214 return (rc); 3215 return (rc);
3215} 3216}
3216 3217
3217#endif /* CONFIG_CIFS_EXPERIMENTAL */ 3218#endif /* CONFIG_CIFS_ACL */
3218 3219
3219/* Legacy Query Path Information call for lookup to old servers such 3220/* Legacy Query Path Information call for lookup to old servers such
3220 as Win9x/WinME */ 3221 as Win9x/WinME */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9eb327defa1d..cc1a8604a790 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -105,6 +105,7 @@ struct smb_vol {
105 unsigned int wsize; 105 unsigned int wsize;
106 bool sockopt_tcp_nodelay:1; 106 bool sockopt_tcp_nodelay:1;
107 unsigned short int port; 107 unsigned short int port;
108 unsigned long actimeo; /* attribute cache timeout (jiffies) */
108 char *prepath; 109 char *prepath;
109 struct sockaddr_storage srcaddr; /* allow binding to a local IP */ 110 struct sockaddr_storage srcaddr; /* allow binding to a local IP */
110 struct nls_table *local_nls; 111 struct nls_table *local_nls;
@@ -116,6 +117,7 @@ struct smb_vol {
116 117
117static int ipv4_connect(struct TCP_Server_Info *server); 118static int ipv4_connect(struct TCP_Server_Info *server);
118static int ipv6_connect(struct TCP_Server_Info *server); 119static int ipv6_connect(struct TCP_Server_Info *server);
120static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
119static void cifs_prune_tlinks(struct work_struct *work); 121static void cifs_prune_tlinks(struct work_struct *work);
120 122
121/* 123/*
@@ -805,23 +807,20 @@ cifs_parse_mount_options(char *options, const char *devname,
805 short int override_gid = -1; 807 short int override_gid = -1;
806 bool uid_specified = false; 808 bool uid_specified = false;
807 bool gid_specified = false; 809 bool gid_specified = false;
810 char *nodename = utsname()->nodename;
808 811
809 separator[0] = ','; 812 separator[0] = ',';
810 separator[1] = 0; 813 separator[1] = 0;
811 814
812 if (Local_System_Name[0] != 0) 815 /*
813 memcpy(vol->source_rfc1001_name, Local_System_Name, 15); 816 * does not have to be perfect mapping since field is
814 else { 817 * informational, only used for servers that do not support
815 char *nodename = utsname()->nodename; 818 * port 445 and it can be overridden at mount time
816 int n = strnlen(nodename, 15); 819 */
817 memset(vol->source_rfc1001_name, 0x20, 15); 820 memset(vol->source_rfc1001_name, 0x20, 15);
818 for (i = 0; i < n; i++) { 821 for (i = 0; i < strnlen(nodename, 15); i++)
819 /* does not have to be perfect mapping since field is 822 vol->source_rfc1001_name[i] = toupper(nodename[i]);
820 informational, only used for servers that do not support 823
821 port 445 and it can be overridden at mount time */
822 vol->source_rfc1001_name[i] = toupper(nodename[i]);
823 }
824 }
825 vol->source_rfc1001_name[15] = 0; 824 vol->source_rfc1001_name[15] = 0;
826 /* null target name indicates to use *SMBSERVR default called name 825 /* null target name indicates to use *SMBSERVR default called name
827 if we end up sending RFC1001 session initialize */ 826 if we end up sending RFC1001 session initialize */
@@ -839,6 +838,8 @@ cifs_parse_mount_options(char *options, const char *devname,
839 /* default to using server inode numbers where available */ 838 /* default to using server inode numbers where available */
840 vol->server_ino = 1; 839 vol->server_ino = 1;
841 840
841 vol->actimeo = CIFS_DEF_ACTIMEO;
842
842 if (!options) 843 if (!options)
843 return 1; 844 return 1;
844 845
@@ -1213,6 +1214,16 @@ cifs_parse_mount_options(char *options, const char *devname,
1213 printk(KERN_WARNING "CIFS: server net" 1214 printk(KERN_WARNING "CIFS: server net"
1214 "biosname longer than 15 truncated.\n"); 1215 "biosname longer than 15 truncated.\n");
1215 } 1216 }
1217 } else if (strnicmp(data, "actimeo", 7) == 0) {
1218 if (value && *value) {
1219 vol->actimeo = HZ * simple_strtoul(value,
1220 &value, 0);
1221 if (vol->actimeo > CIFS_MAX_ACTIMEO) {
1222 cERROR(1, "CIFS: attribute cache"
1223 "timeout too large");
1224 return 1;
1225 }
1226 }
1216 } else if (strnicmp(data, "credentials", 4) == 0) { 1227 } else if (strnicmp(data, "credentials", 4) == 0) {
1217 /* ignore */ 1228 /* ignore */
1218 } else if (strnicmp(data, "version", 3) == 0) { 1229 } else if (strnicmp(data, "version", 3) == 0) {
@@ -1351,6 +1362,11 @@ cifs_parse_mount_options(char *options, const char *devname,
1351 "supported. Instead set " 1362 "supported. Instead set "
1352 "/proc/fs/cifs/LookupCacheEnabled to 0\n"); 1363 "/proc/fs/cifs/LookupCacheEnabled to 0\n");
1353 } else if (strnicmp(data, "fsc", 3) == 0) { 1364 } else if (strnicmp(data, "fsc", 3) == 0) {
1365#ifndef CONFIG_CIFS_FSCACHE
1366 cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
1367 "kernel config option set");
1368 return 1;
1369#endif
1354 vol->fsc = true; 1370 vol->fsc = true;
1355 } else if (strnicmp(data, "mfsymlinks", 10) == 0) { 1371 } else if (strnicmp(data, "mfsymlinks", 10) == 0) {
1356 vol->mfsymlinks = true; 1372 vol->mfsymlinks = true;
@@ -2565,6 +2581,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
2565 cFYI(1, "file mode: 0x%x dir mode: 0x%x", 2581 cFYI(1, "file mode: 0x%x dir mode: 0x%x",
2566 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); 2582 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
2567 2583
2584 cifs_sb->actimeo = pvolume_info->actimeo;
2585
2568 if (pvolume_info->noperm) 2586 if (pvolume_info->noperm)
2569 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM; 2587 cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
2570 if (pvolume_info->setuids) 2588 if (pvolume_info->setuids)
@@ -2815,13 +2833,13 @@ remote_path_check:
2815 /* check if a whole path (including prepath) is not remote */ 2833 /* check if a whole path (including prepath) is not remote */
2816 if (!rc && cifs_sb->prepathlen && tcon) { 2834 if (!rc && cifs_sb->prepathlen && tcon) {
2817 /* build_path_to_root works only when we have a valid tcon */ 2835 /* build_path_to_root works only when we have a valid tcon */
2818 full_path = cifs_build_path_to_root(cifs_sb); 2836 full_path = cifs_build_path_to_root(cifs_sb, tcon);
2819 if (full_path == NULL) { 2837 if (full_path == NULL) {
2820 rc = -ENOMEM; 2838 rc = -ENOMEM;
2821 goto mount_fail_check; 2839 goto mount_fail_check;
2822 } 2840 }
2823 rc = is_path_accessible(xid, tcon, cifs_sb, full_path); 2841 rc = is_path_accessible(xid, tcon, cifs_sb, full_path);
2824 if (rc != -EREMOTE) { 2842 if (rc != 0 && rc != -EREMOTE) {
2825 kfree(full_path); 2843 kfree(full_path);
2826 goto mount_fail_check; 2844 goto mount_fail_check;
2827 } 2845 }
@@ -2900,24 +2918,16 @@ remote_path_check:
2900 goto mount_fail_check; 2918 goto mount_fail_check;
2901 } 2919 }
2902 2920
2903 tlink->tl_index = pSesInfo->linux_uid; 2921 tlink->tl_uid = pSesInfo->linux_uid;
2904 tlink->tl_tcon = tcon; 2922 tlink->tl_tcon = tcon;
2905 tlink->tl_time = jiffies; 2923 tlink->tl_time = jiffies;
2906 set_bit(TCON_LINK_MASTER, &tlink->tl_flags); 2924 set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
2907 set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); 2925 set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
2908 2926
2909 rc = radix_tree_preload(GFP_KERNEL); 2927 cifs_sb->master_tlink = tlink;
2910 if (rc == -ENOMEM) {
2911 kfree(tlink);
2912 goto mount_fail_check;
2913 }
2914
2915 spin_lock(&cifs_sb->tlink_tree_lock); 2928 spin_lock(&cifs_sb->tlink_tree_lock);
2916 radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink); 2929 tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
2917 radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid,
2918 CIFS_TLINK_MASTER_TAG);
2919 spin_unlock(&cifs_sb->tlink_tree_lock); 2930 spin_unlock(&cifs_sb->tlink_tree_lock);
2920 radix_tree_preload_end();
2921 2931
2922 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, 2932 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
2923 TLINK_IDLE_EXPIRE); 2933 TLINK_IDLE_EXPIRE);
@@ -3107,32 +3117,25 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
3107int 3117int
3108cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) 3118cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3109{ 3119{
3110 int i, ret; 3120 struct rb_root *root = &cifs_sb->tlink_tree;
3121 struct rb_node *node;
3122 struct tcon_link *tlink;
3111 char *tmp; 3123 char *tmp;
3112 struct tcon_link *tlink[8];
3113 unsigned long index = 0;
3114 3124
3115 cancel_delayed_work_sync(&cifs_sb->prune_tlinks); 3125 cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
3116 3126
3117 do { 3127 spin_lock(&cifs_sb->tlink_tree_lock);
3118 spin_lock(&cifs_sb->tlink_tree_lock); 3128 while ((node = rb_first(root))) {
3119 ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree, 3129 tlink = rb_entry(node, struct tcon_link, tl_rbnode);
3120 (void **)tlink, index, 3130 cifs_get_tlink(tlink);
3121 ARRAY_SIZE(tlink)); 3131 clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
3122 /* increment index for next pass */ 3132 rb_erase(node, root);
3123 if (ret > 0)
3124 index = tlink[ret - 1]->tl_index + 1;
3125 for (i = 0; i < ret; i++) {
3126 cifs_get_tlink(tlink[i]);
3127 clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
3128 radix_tree_delete(&cifs_sb->tlink_tree,
3129 tlink[i]->tl_index);
3130 }
3131 spin_unlock(&cifs_sb->tlink_tree_lock);
3132 3133
3133 for (i = 0; i < ret; i++) 3134 spin_unlock(&cifs_sb->tlink_tree_lock);
3134 cifs_put_tlink(tlink[i]); 3135 cifs_put_tlink(tlink);
3135 } while (ret != 0); 3136 spin_lock(&cifs_sb->tlink_tree_lock);
3137 }
3138 spin_unlock(&cifs_sb->tlink_tree_lock);
3136 3139
3137 tmp = cifs_sb->prepath; 3140 tmp = cifs_sb->prepath;
3138 cifs_sb->prepathlen = 0; 3141 cifs_sb->prepathlen = 0;
@@ -3271,22 +3274,10 @@ out:
3271 return tcon; 3274 return tcon;
3272} 3275}
3273 3276
3274static struct tcon_link * 3277static inline struct tcon_link *
3275cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) 3278cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
3276{ 3279{
3277 struct tcon_link *tlink; 3280 return cifs_sb->master_tlink;
3278 unsigned int ret;
3279
3280 spin_lock(&cifs_sb->tlink_tree_lock);
3281 ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink,
3282 0, 1, CIFS_TLINK_MASTER_TAG);
3283 spin_unlock(&cifs_sb->tlink_tree_lock);
3284
3285 /* the master tcon should always be present */
3286 if (ret == 0)
3287 BUG();
3288
3289 return tlink;
3290} 3281}
3291 3282
3292struct cifsTconInfo * 3283struct cifsTconInfo *
@@ -3302,6 +3293,47 @@ cifs_sb_tcon_pending_wait(void *unused)
3302 return signal_pending(current) ? -ERESTARTSYS : 0; 3293 return signal_pending(current) ? -ERESTARTSYS : 0;
3303} 3294}
3304 3295
3296/* find and return a tlink with given uid */
3297static struct tcon_link *
3298tlink_rb_search(struct rb_root *root, uid_t uid)
3299{
3300 struct rb_node *node = root->rb_node;
3301 struct tcon_link *tlink;
3302
3303 while (node) {
3304 tlink = rb_entry(node, struct tcon_link, tl_rbnode);
3305
3306 if (tlink->tl_uid > uid)
3307 node = node->rb_left;
3308 else if (tlink->tl_uid < uid)
3309 node = node->rb_right;
3310 else
3311 return tlink;
3312 }
3313 return NULL;
3314}
3315
3316/* insert a tcon_link into the tree */
3317static void
3318tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
3319{
3320 struct rb_node **new = &(root->rb_node), *parent = NULL;
3321 struct tcon_link *tlink;
3322
3323 while (*new) {
3324 tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
3325 parent = *new;
3326
3327 if (tlink->tl_uid > new_tlink->tl_uid)
3328 new = &((*new)->rb_left);
3329 else
3330 new = &((*new)->rb_right);
3331 }
3332
3333 rb_link_node(&new_tlink->tl_rbnode, parent, new);
3334 rb_insert_color(&new_tlink->tl_rbnode, root);
3335}
3336
3305/* 3337/*
3306 * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the 3338 * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the
3307 * current task. 3339 * current task.
@@ -3309,7 +3341,7 @@ cifs_sb_tcon_pending_wait(void *unused)
3309 * If the superblock doesn't refer to a multiuser mount, then just return 3341 * If the superblock doesn't refer to a multiuser mount, then just return
3310 * the master tcon for the mount. 3342 * the master tcon for the mount.
3311 * 3343 *
3312 * First, search the radix tree for an existing tcon for this fsuid. If one 3344 * First, search the rbtree for an existing tcon for this fsuid. If one
3313 * exists, then check to see if it's pending construction. If it is then wait 3345 * exists, then check to see if it's pending construction. If it is then wait
3314 * for construction to complete. Once it's no longer pending, check to see if 3346 * for construction to complete. Once it's no longer pending, check to see if
3315 * it failed and either return an error or retry construction, depending on 3347 * it failed and either return an error or retry construction, depending on
@@ -3322,14 +3354,14 @@ struct tcon_link *
3322cifs_sb_tlink(struct cifs_sb_info *cifs_sb) 3354cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
3323{ 3355{
3324 int ret; 3356 int ret;
3325 unsigned long fsuid = (unsigned long) current_fsuid(); 3357 uid_t fsuid = current_fsuid();
3326 struct tcon_link *tlink, *newtlink; 3358 struct tcon_link *tlink, *newtlink;
3327 3359
3328 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 3360 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
3329 return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); 3361 return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
3330 3362
3331 spin_lock(&cifs_sb->tlink_tree_lock); 3363 spin_lock(&cifs_sb->tlink_tree_lock);
3332 tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid); 3364 tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
3333 if (tlink) 3365 if (tlink)
3334 cifs_get_tlink(tlink); 3366 cifs_get_tlink(tlink);
3335 spin_unlock(&cifs_sb->tlink_tree_lock); 3367 spin_unlock(&cifs_sb->tlink_tree_lock);
@@ -3338,36 +3370,24 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
3338 newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL); 3370 newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL);
3339 if (newtlink == NULL) 3371 if (newtlink == NULL)
3340 return ERR_PTR(-ENOMEM); 3372 return ERR_PTR(-ENOMEM);
3341 newtlink->tl_index = fsuid; 3373 newtlink->tl_uid = fsuid;
3342 newtlink->tl_tcon = ERR_PTR(-EACCES); 3374 newtlink->tl_tcon = ERR_PTR(-EACCES);
3343 set_bit(TCON_LINK_PENDING, &newtlink->tl_flags); 3375 set_bit(TCON_LINK_PENDING, &newtlink->tl_flags);
3344 set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags); 3376 set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags);
3345 cifs_get_tlink(newtlink); 3377 cifs_get_tlink(newtlink);
3346 3378
3347 ret = radix_tree_preload(GFP_KERNEL);
3348 if (ret != 0) {
3349 kfree(newtlink);
3350 return ERR_PTR(ret);
3351 }
3352
3353 spin_lock(&cifs_sb->tlink_tree_lock); 3379 spin_lock(&cifs_sb->tlink_tree_lock);
3354 /* was one inserted after previous search? */ 3380 /* was one inserted after previous search? */
3355 tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid); 3381 tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
3356 if (tlink) { 3382 if (tlink) {
3357 cifs_get_tlink(tlink); 3383 cifs_get_tlink(tlink);
3358 spin_unlock(&cifs_sb->tlink_tree_lock); 3384 spin_unlock(&cifs_sb->tlink_tree_lock);
3359 radix_tree_preload_end();
3360 kfree(newtlink); 3385 kfree(newtlink);
3361 goto wait_for_construction; 3386 goto wait_for_construction;
3362 } 3387 }
3363 ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink);
3364 spin_unlock(&cifs_sb->tlink_tree_lock);
3365 radix_tree_preload_end();
3366 if (ret) {
3367 kfree(newtlink);
3368 return ERR_PTR(ret);
3369 }
3370 tlink = newtlink; 3388 tlink = newtlink;
3389 tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
3390 spin_unlock(&cifs_sb->tlink_tree_lock);
3371 } else { 3391 } else {
3372wait_for_construction: 3392wait_for_construction:
3373 ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, 3393 ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
@@ -3413,39 +3433,39 @@ cifs_prune_tlinks(struct work_struct *work)
3413{ 3433{
3414 struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info, 3434 struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
3415 prune_tlinks.work); 3435 prune_tlinks.work);
3416 struct tcon_link *tlink[8]; 3436 struct rb_root *root = &cifs_sb->tlink_tree;
3417 unsigned long now = jiffies; 3437 struct rb_node *node = rb_first(root);
3418 unsigned long index = 0; 3438 struct rb_node *tmp;
3419 int i, ret; 3439 struct tcon_link *tlink;
3420 3440
3421 do { 3441 /*
3422 spin_lock(&cifs_sb->tlink_tree_lock); 3442 * Because we drop the spinlock in the loop in order to put the tlink
3423 ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree, 3443 * it's not guarded against removal of links from the tree. The only
3424 (void **)tlink, index, 3444 * places that remove entries from the tree are this function and
3425 ARRAY_SIZE(tlink)); 3445 * umounts. Because this function is non-reentrant and is canceled
3426 /* increment index for next pass */ 3446 * before umount can proceed, this is safe.
3427 if (ret > 0) 3447 */
3428 index = tlink[ret - 1]->tl_index + 1; 3448 spin_lock(&cifs_sb->tlink_tree_lock);
3429 for (i = 0; i < ret; i++) { 3449 node = rb_first(root);
3430 if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) || 3450 while (node != NULL) {
3431 atomic_read(&tlink[i]->tl_count) != 0 || 3451 tmp = node;
3432 time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE, 3452 node = rb_next(tmp);
3433 now)) { 3453 tlink = rb_entry(tmp, struct tcon_link, tl_rbnode);
3434 tlink[i] = NULL; 3454
3435 continue; 3455 if (test_bit(TCON_LINK_MASTER, &tlink->tl_flags) ||
3436 } 3456 atomic_read(&tlink->tl_count) != 0 ||
3437 cifs_get_tlink(tlink[i]); 3457 time_after(tlink->tl_time + TLINK_IDLE_EXPIRE, jiffies))
3438 clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags); 3458 continue;
3439 radix_tree_delete(&cifs_sb->tlink_tree,
3440 tlink[i]->tl_index);
3441 }
3442 spin_unlock(&cifs_sb->tlink_tree_lock);
3443 3459
3444 for (i = 0; i < ret; i++) { 3460 cifs_get_tlink(tlink);
3445 if (tlink[i] != NULL) 3461 clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
3446 cifs_put_tlink(tlink[i]); 3462 rb_erase(tmp, root);
3447 } 3463
3448 } while (ret != 0); 3464 spin_unlock(&cifs_sb->tlink_tree_lock);
3465 cifs_put_tlink(tlink);
3466 spin_lock(&cifs_sb->tlink_tree_lock);
3467 }
3468 spin_unlock(&cifs_sb->tlink_tree_lock);
3449 3469
3450 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, 3470 queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
3451 TLINK_IDLE_EXPIRE); 3471 TLINK_IDLE_EXPIRE);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 3840eddbfb7a..db2a58c00f7b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon,
135 struct inode *newinode) 135 struct inode *newinode)
136{ 136{
137 if (tcon->nocase) 137 if (tcon->nocase)
138 direntry->d_op = &cifs_ci_dentry_ops; 138 d_set_d_op(direntry, &cifs_ci_dentry_ops);
139 else 139 else
140 direntry->d_op = &cifs_dentry_ops; 140 d_set_d_op(direntry, &cifs_dentry_ops);
141 d_instantiate(direntry, newinode); 141 d_instantiate(direntry, newinode);
142} 142}
143 143
@@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
421 rc = cifs_get_inode_info_unix(&newinode, full_path, 421 rc = cifs_get_inode_info_unix(&newinode, full_path,
422 inode->i_sb, xid); 422 inode->i_sb, xid);
423 if (pTcon->nocase) 423 if (pTcon->nocase)
424 direntry->d_op = &cifs_ci_dentry_ops; 424 d_set_d_op(direntry, &cifs_ci_dentry_ops);
425 else 425 else
426 direntry->d_op = &cifs_dentry_ops; 426 d_set_d_op(direntry, &cifs_dentry_ops);
427 427
428 if (rc == 0) 428 if (rc == 0)
429 d_instantiate(direntry, newinode); 429 d_instantiate(direntry, newinode);
@@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
604 604
605 if ((rc == 0) && (newInode != NULL)) { 605 if ((rc == 0) && (newInode != NULL)) {
606 if (pTcon->nocase) 606 if (pTcon->nocase)
607 direntry->d_op = &cifs_ci_dentry_ops; 607 d_set_d_op(direntry, &cifs_ci_dentry_ops);
608 else 608 else
609 direntry->d_op = &cifs_dentry_ops; 609 d_set_d_op(direntry, &cifs_dentry_ops);
610 d_add(direntry, newInode); 610 d_add(direntry, newInode);
611 if (posix_open) { 611 if (posix_open) {
612 filp = lookup_instantiate_filp(nd, direntry, 612 filp = lookup_instantiate_filp(nd, direntry,
@@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
634 rc = 0; 634 rc = 0;
635 direntry->d_time = jiffies; 635 direntry->d_time = jiffies;
636 if (pTcon->nocase) 636 if (pTcon->nocase)
637 direntry->d_op = &cifs_ci_dentry_ops; 637 d_set_d_op(direntry, &cifs_ci_dentry_ops);
638 else 638 else
639 direntry->d_op = &cifs_dentry_ops; 639 d_set_d_op(direntry, &cifs_dentry_ops);
640 d_add(direntry, NULL); 640 d_add(direntry, NULL);
641 /* if it was once a directory (but how can we tell?) we could do 641 /* if it was once a directory (but how can we tell?) we could do
642 shrink_dcache_parent(direntry); */ 642 shrink_dcache_parent(direntry); */
@@ -656,22 +656,37 @@ lookup_out:
656static int 656static int
657cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) 657cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
658{ 658{
659 int isValid = 1; 659 if (nd->flags & LOOKUP_RCU)
660 return -ECHILD;
660 661
661 if (direntry->d_inode) { 662 if (direntry->d_inode) {
662 if (cifs_revalidate_dentry(direntry)) 663 if (cifs_revalidate_dentry(direntry))
663 return 0; 664 return 0;
664 } else { 665 else
665 cFYI(1, "neg dentry 0x%p name = %s", 666 return 1;
666 direntry, direntry->d_name.name);
667 if (time_after(jiffies, direntry->d_time + HZ) ||
668 !lookupCacheEnabled) {
669 d_drop(direntry);
670 isValid = 0;
671 }
672 } 667 }
673 668
674 return isValid; 669 /*
670 * This may be nfsd (or something), anyway, we can't see the
671 * intent of this. So, since this can be for creation, drop it.
672 */
673 if (!nd)
674 return 0;
675
676 /*
677 * Drop the negative dentry, in order to make sure to use the
678 * case sensitive name which is specified by user if this is
679 * for creation.
680 */
681 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
682 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
683 return 0;
684 }
685
686 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
687 return 0;
688
689 return 1;
675} 690}
676 691
677/* static int cifs_d_delete(struct dentry *direntry) 692/* static int cifs_d_delete(struct dentry *direntry)
@@ -688,9 +703,10 @@ const struct dentry_operations cifs_dentry_ops = {
688/* d_delete: cifs_d_delete, */ /* not needed except for debugging */ 703/* d_delete: cifs_d_delete, */ /* not needed except for debugging */
689}; 704};
690 705
691static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) 706static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode,
707 struct qstr *q)
692{ 708{
693 struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; 709 struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
694 unsigned long hash; 710 unsigned long hash;
695 int i; 711 int i;
696 712
@@ -703,21 +719,16 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q)
703 return 0; 719 return 0;
704} 720}
705 721
706static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, 722static int cifs_ci_compare(const struct dentry *parent,
707 struct qstr *b) 723 const struct inode *pinode,
724 const struct dentry *dentry, const struct inode *inode,
725 unsigned int len, const char *str, const struct qstr *name)
708{ 726{
709 struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; 727 struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls;
710 728
711 if ((a->len == b->len) && 729 if ((name->len == len) &&
712 (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) { 730 (nls_strnicmp(codepage, name->name, str, len) == 0))
713 /*
714 * To preserve case, don't let an existing negative dentry's
715 * case take precedence. If a is not a negative dentry, this
716 * should have no side effects
717 */
718 memcpy((void *)a->name, b->name, a->len);
719 return 0; 731 return 0;
720 }
721 return 1; 732 return 1;
722} 733}
723 734
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 0eb87026cad3..548f06230a6d 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -66,7 +66,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
66 /* Search for server name delimiter */ 66 /* Search for server name delimiter */
67 sep = memchr(hostname, '\\', len); 67 sep = memchr(hostname, '\\', len);
68 if (sep) 68 if (sep)
69 len = sep - unc; 69 len = sep - hostname;
70 else 70 else
71 cFYI(1, "%s: probably server name is whole unc: %s", 71 cFYI(1, "%s: probably server name is whole unc: %s",
72 __func__, unc); 72 __func__, unc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ae82159cf7fa..5a28660ca2b5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -146,12 +146,7 @@ client_can_cache:
146 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, 146 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
147 xid, NULL); 147 xid, NULL);
148 148
149 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 149 cifs_set_oplock_level(pCifsInode, oplock);
150 pCifsInode->clientCanCacheAll = true;
151 pCifsInode->clientCanCacheRead = true;
152 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
153 } else if ((oplock & 0xF) == OPLOCK_READ)
154 pCifsInode->clientCanCacheRead = true;
155 150
156 return rc; 151 return rc;
157} 152}
@@ -253,12 +248,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
253 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); 248 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
254 spin_unlock(&cifs_file_list_lock); 249 spin_unlock(&cifs_file_list_lock);
255 250
256 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 251 cifs_set_oplock_level(pCifsInode, oplock);
257 pCifsInode->clientCanCacheAll = true;
258 pCifsInode->clientCanCacheRead = true;
259 cFYI(1, "Exclusive Oplock inode %p", inode);
260 } else if ((oplock & 0xF) == OPLOCK_READ)
261 pCifsInode->clientCanCacheRead = true;
262 252
263 file->private_data = pCifsFile; 253 file->private_data = pCifsFile;
264 return pCifsFile; 254 return pCifsFile;
@@ -271,8 +261,9 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
271 */ 261 */
272void cifsFileInfo_put(struct cifsFileInfo *cifs_file) 262void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
273{ 263{
264 struct inode *inode = cifs_file->dentry->d_inode;
274 struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); 265 struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
275 struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode); 266 struct cifsInodeInfo *cifsi = CIFS_I(inode);
276 struct cifsLockInfo *li, *tmp; 267 struct cifsLockInfo *li, *tmp;
277 268
278 spin_lock(&cifs_file_list_lock); 269 spin_lock(&cifs_file_list_lock);
@@ -288,8 +279,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
288 if (list_empty(&cifsi->openFileList)) { 279 if (list_empty(&cifsi->openFileList)) {
289 cFYI(1, "closing last open instance for inode %p", 280 cFYI(1, "closing last open instance for inode %p",
290 cifs_file->dentry->d_inode); 281 cifs_file->dentry->d_inode);
291 cifsi->clientCanCacheRead = false; 282 cifs_set_oplock_level(cifsi, 0);
292 cifsi->clientCanCacheAll = false;
293 } 283 }
294 spin_unlock(&cifs_file_list_lock); 284 spin_unlock(&cifs_file_list_lock);
295 285
@@ -607,8 +597,6 @@ reopen_success:
607 rc = filemap_write_and_wait(inode->i_mapping); 597 rc = filemap_write_and_wait(inode->i_mapping);
608 mapping_set_error(inode->i_mapping, rc); 598 mapping_set_error(inode->i_mapping, rc);
609 599
610 pCifsInode->clientCanCacheAll = false;
611 pCifsInode->clientCanCacheRead = false;
612 if (tcon->unix_ext) 600 if (tcon->unix_ext)
613 rc = cifs_get_inode_info_unix(&inode, 601 rc = cifs_get_inode_info_unix(&inode,
614 full_path, inode->i_sb, xid); 602 full_path, inode->i_sb, xid);
@@ -622,18 +610,9 @@ reopen_success:
622 invalidate the current end of file on the server 610 invalidate the current end of file on the server
623 we can not go to the server to get the new inod 611 we can not go to the server to get the new inod
624 info */ 612 info */
625 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { 613
626 pCifsInode->clientCanCacheAll = true; 614 cifs_set_oplock_level(pCifsInode, oplock);
627 pCifsInode->clientCanCacheRead = true; 615
628 cFYI(1, "Exclusive Oplock granted on inode %p",
629 pCifsFile->dentry->d_inode);
630 } else if ((oplock & 0xF) == OPLOCK_READ) {
631 pCifsInode->clientCanCacheRead = true;
632 pCifsInode->clientCanCacheAll = false;
633 } else {
634 pCifsInode->clientCanCacheRead = false;
635 pCifsInode->clientCanCacheAll = false;
636 }
637 cifs_relock_file(pCifsFile); 616 cifs_relock_file(pCifsFile);
638 617
639reopen_error_exit: 618reopen_error_exit:
@@ -775,12 +754,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
775 754
776 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 755 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
777 tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink); 756 tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
778
779 if (file->private_data == NULL) {
780 rc = -EBADF;
781 FreeXid(xid);
782 return rc;
783 }
784 netfid = ((struct cifsFileInfo *)file->private_data)->netfid; 757 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
785 758
786 if ((tcon->ses->capabilities & CAP_UNIX) && 759 if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -956,6 +929,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
956ssize_t cifs_user_write(struct file *file, const char __user *write_data, 929ssize_t cifs_user_write(struct file *file, const char __user *write_data,
957 size_t write_size, loff_t *poffset) 930 size_t write_size, loff_t *poffset)
958{ 931{
932 struct inode *inode = file->f_path.dentry->d_inode;
959 int rc = 0; 933 int rc = 0;
960 unsigned int bytes_written = 0; 934 unsigned int bytes_written = 0;
961 unsigned int total_written; 935 unsigned int total_written;
@@ -963,7 +937,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
963 struct cifsTconInfo *pTcon; 937 struct cifsTconInfo *pTcon;
964 int xid, long_op; 938 int xid, long_op;
965 struct cifsFileInfo *open_file; 939 struct cifsFileInfo *open_file;
966 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode); 940 struct cifsInodeInfo *cifsi = CIFS_I(inode);
967 941
968 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 942 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
969 943
@@ -1029,21 +1003,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
1029 1003
1030 cifs_stats_bytes_written(pTcon, total_written); 1004 cifs_stats_bytes_written(pTcon, total_written);
1031 1005
1032 /* since the write may have blocked check these pointers again */
1033 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1034 struct inode *inode = file->f_path.dentry->d_inode;
1035/* Do not update local mtime - server will set its actual value on write 1006/* Do not update local mtime - server will set its actual value on write
1036 * inode->i_ctime = inode->i_mtime = 1007 * inode->i_ctime = inode->i_mtime =
1037 * current_fs_time(inode->i_sb);*/ 1008 * current_fs_time(inode->i_sb);*/
1038 if (total_written > 0) { 1009 if (total_written > 0) {
1039 spin_lock(&inode->i_lock); 1010 spin_lock(&inode->i_lock);
1040 if (*poffset > file->f_path.dentry->d_inode->i_size) 1011 if (*poffset > inode->i_size)
1041 i_size_write(file->f_path.dentry->d_inode, 1012 i_size_write(inode, *poffset);
1042 *poffset); 1013 spin_unlock(&inode->i_lock);
1043 spin_unlock(&inode->i_lock);
1044 }
1045 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1046 } 1014 }
1015 mark_inode_dirty_sync(inode);
1016
1047 FreeXid(xid); 1017 FreeXid(xid);
1048 return total_written; 1018 return total_written;
1049} 1019}
@@ -1138,7 +1108,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
1138 return total_written; 1108 return total_written;
1139} 1109}
1140 1110
1141#ifdef CONFIG_CIFS_EXPERIMENTAL
1142struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, 1111struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1143 bool fsuid_only) 1112 bool fsuid_only)
1144{ 1113{
@@ -1172,13 +1141,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1172 spin_unlock(&cifs_file_list_lock); 1141 spin_unlock(&cifs_file_list_lock);
1173 return NULL; 1142 return NULL;
1174} 1143}
1175#endif
1176 1144
1177struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, 1145struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1178 bool fsuid_only) 1146 bool fsuid_only)
1179{ 1147{
1180 struct cifsFileInfo *open_file; 1148 struct cifsFileInfo *open_file;
1181 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); 1149 struct cifs_sb_info *cifs_sb;
1182 bool any_available = false; 1150 bool any_available = false;
1183 int rc; 1151 int rc;
1184 1152
@@ -1192,6 +1160,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1192 return NULL; 1160 return NULL;
1193 } 1161 }
1194 1162
1163 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1164
1195 /* only filter by fsuid on multiuser mounts */ 1165 /* only filter by fsuid on multiuser mounts */
1196 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) 1166 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1197 fsuid_only = false; 1167 fsuid_only = false;
@@ -2299,8 +2269,10 @@ void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2299 2269
2300void cifs_oplock_break_put(struct cifsFileInfo *cfile) 2270void cifs_oplock_break_put(struct cifsFileInfo *cfile)
2301{ 2271{
2272 struct super_block *sb = cfile->dentry->d_sb;
2273
2302 cifsFileInfo_put(cfile); 2274 cifsFileInfo_put(cfile);
2303 cifs_sb_deactive(cfile->dentry->d_sb); 2275 cifs_sb_deactive(sb);
2304} 2276}
2305 2277
2306const struct address_space_operations cifs_addr_ops = { 2278const struct address_space_operations cifs_addr_ops = {
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index a2ad94efcfe6..297a43d0ff7f 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -2,7 +2,7 @@
2 * fs/cifs/fscache.c - CIFS filesystem cache interface 2 * fs/cifs/fscache.c - CIFS filesystem cache interface
3 * 3 *
4 * Copyright (c) 2010 Novell, Inc. 4 * Copyright (c) 2010 Novell, Inc.
5 * Author(s): Suresh Jayaraman (sjayaraman@suse.de> 5 * Author(s): Suresh Jayaraman <sjayaraman@suse.de>
6 * 6 *
7 * This library is free software; you can redistribute it and/or modify 7 * This library is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published 8 * it under the terms of the GNU Lesser General Public License as published
@@ -67,10 +67,12 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
67 if (cifsi->fscache) 67 if (cifsi->fscache)
68 return; 68 return;
69 69
70 cifsi->fscache = fscache_acquire_cookie(tcon->fscache, 70 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
71 cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
71 &cifs_fscache_inode_object_def, cifsi); 72 &cifs_fscache_inode_object_def, cifsi);
72 cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache, 73 cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache,
73 cifsi->fscache); 74 cifsi->fscache);
75 }
74} 76}
75 77
76void cifs_fscache_release_inode_cookie(struct inode *inode) 78void cifs_fscache_release_inode_cookie(struct inode *inode)
@@ -101,10 +103,8 @@ void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
101{ 103{
102 if ((filp->f_flags & O_ACCMODE) != O_RDONLY) 104 if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
103 cifs_fscache_disable_inode_cookie(inode); 105 cifs_fscache_disable_inode_cookie(inode);
104 else { 106 else
105 cifs_fscache_enable_inode_cookie(inode); 107 cifs_fscache_enable_inode_cookie(inode);
106 cFYI(1, "CIFS: fscache inode cookie set");
107 }
108} 108}
109 109
110void cifs_fscache_reset_inode_cookie(struct inode *inode) 110void cifs_fscache_reset_inode_cookie(struct inode *inode)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 39869c3c3efb..a853a89857a5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -686,13 +686,18 @@ int cifs_get_inode_info(struct inode **pinode,
686 cFYI(1, "cifs_sfu_type failed: %d", tmprc); 686 cFYI(1, "cifs_sfu_type failed: %d", tmprc);
687 } 687 }
688 688
689#ifdef CONFIG_CIFS_EXPERIMENTAL 689#ifdef CONFIG_CIFS_ACL
690 /* fill in 0777 bits from ACL */ 690 /* fill in 0777 bits from ACL */
691 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { 691 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
692 cFYI(1, "Getting mode bits from ACL"); 692 rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path,
693 cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid); 693 pfid);
694 if (rc) {
695 cFYI(1, "%s: Getting ACL failed with error: %d",
696 __func__, rc);
697 goto cgii_exit;
698 }
694 } 699 }
695#endif 700#endif /* CONFIG_CIFS_ACL */
696 701
697 /* fill in remaining high mode bits e.g. SUID, VTX */ 702 /* fill in remaining high mode bits e.g. SUID, VTX */
698 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) 703 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
@@ -723,12 +728,12 @@ static const struct inode_operations cifs_ipc_inode_ops = {
723 .lookup = cifs_lookup, 728 .lookup = cifs_lookup,
724}; 729};
725 730
726char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) 731char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
732 struct cifsTconInfo *tcon)
727{ 733{
728 int pplen = cifs_sb->prepathlen; 734 int pplen = cifs_sb->prepathlen;
729 int dfsplen; 735 int dfsplen;
730 char *full_path = NULL; 736 char *full_path = NULL;
731 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
732 737
733 /* if no prefix path, simply set path to the root of share to "" */ 738 /* if no prefix path, simply set path to the root of share to "" */
734 if (pplen == 0) { 739 if (pplen == 0) {
@@ -804,14 +809,14 @@ inode_has_hashed_dentries(struct inode *inode)
804{ 809{
805 struct dentry *dentry; 810 struct dentry *dentry;
806 811
807 spin_lock(&dcache_lock); 812 spin_lock(&inode->i_lock);
808 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 813 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
809 if (!d_unhashed(dentry) || IS_ROOT(dentry)) { 814 if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
810 spin_unlock(&dcache_lock); 815 spin_unlock(&inode->i_lock);
811 return true; 816 return true;
812 } 817 }
813 } 818 }
814 spin_unlock(&dcache_lock); 819 spin_unlock(&inode->i_lock);
815 return false; 820 return false;
816} 821}
817 822
@@ -870,7 +875,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
870 char *full_path; 875 char *full_path;
871 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); 876 struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
872 877
873 full_path = cifs_build_path_to_root(cifs_sb); 878 full_path = cifs_build_path_to_root(cifs_sb, tcon);
874 if (full_path == NULL) 879 if (full_path == NULL)
875 return ERR_PTR(-ENOMEM); 880 return ERR_PTR(-ENOMEM);
876 881
@@ -881,8 +886,10 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
881 rc = cifs_get_inode_info(&inode, full_path, NULL, sb, 886 rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
882 xid, NULL); 887 xid, NULL);
883 888
884 if (!inode) 889 if (!inode) {
885 return ERR_PTR(rc); 890 inode = ERR_PTR(rc);
891 goto out;
892 }
886 893
887#ifdef CONFIG_CIFS_FSCACHE 894#ifdef CONFIG_CIFS_FSCACHE
888 /* populate tcon->resource_id */ 895 /* populate tcon->resource_id */
@@ -898,13 +905,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
898 inode->i_uid = cifs_sb->mnt_uid; 905 inode->i_uid = cifs_sb->mnt_uid;
899 inode->i_gid = cifs_sb->mnt_gid; 906 inode->i_gid = cifs_sb->mnt_gid;
900 } else if (rc) { 907 } else if (rc) {
901 kfree(full_path);
902 _FreeXid(xid);
903 iget_failed(inode); 908 iget_failed(inode);
904 return ERR_PTR(rc); 909 inode = ERR_PTR(rc);
905 } 910 }
906 911
907 912out:
908 kfree(full_path); 913 kfree(full_path);
909 /* can not call macro FreeXid here since in a void func 914 /* can not call macro FreeXid here since in a void func
910 * TODO: This is no longer true 915 * TODO: This is no longer true
@@ -1314,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1314 to set uid/gid */ 1319 to set uid/gid */
1315 inc_nlink(inode); 1320 inc_nlink(inode);
1316 if (pTcon->nocase) 1321 if (pTcon->nocase)
1317 direntry->d_op = &cifs_ci_dentry_ops; 1322 d_set_d_op(direntry, &cifs_ci_dentry_ops);
1318 else 1323 else
1319 direntry->d_op = &cifs_dentry_ops; 1324 d_set_d_op(direntry, &cifs_dentry_ops);
1320 1325
1321 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); 1326 cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
1322 cifs_fill_uniqueid(inode->i_sb, &fattr); 1327 cifs_fill_uniqueid(inode->i_sb, &fattr);
@@ -1358,9 +1363,9 @@ mkdir_get_info:
1358 inode->i_sb, xid, NULL); 1363 inode->i_sb, xid, NULL);
1359 1364
1360 if (pTcon->nocase) 1365 if (pTcon->nocase)
1361 direntry->d_op = &cifs_ci_dentry_ops; 1366 d_set_d_op(direntry, &cifs_ci_dentry_ops);
1362 else 1367 else
1363 direntry->d_op = &cifs_dentry_ops; 1368 d_set_d_op(direntry, &cifs_dentry_ops);
1364 d_instantiate(direntry, newinode); 1369 d_instantiate(direntry, newinode);
1365 /* setting nlink not necessary except in cases where we 1370 /* setting nlink not necessary except in cases where we
1366 * failed to get it from the server or was set bogus */ 1371 * failed to get it from the server or was set bogus */
@@ -1648,6 +1653,7 @@ static bool
1648cifs_inode_needs_reval(struct inode *inode) 1653cifs_inode_needs_reval(struct inode *inode)
1649{ 1654{
1650 struct cifsInodeInfo *cifs_i = CIFS_I(inode); 1655 struct cifsInodeInfo *cifs_i = CIFS_I(inode);
1656 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1651 1657
1652 if (cifs_i->clientCanCacheRead) 1658 if (cifs_i->clientCanCacheRead)
1653 return false; 1659 return false;
@@ -1658,19 +1664,21 @@ cifs_inode_needs_reval(struct inode *inode)
1658 if (cifs_i->time == 0) 1664 if (cifs_i->time == 0)
1659 return true; 1665 return true;
1660 1666
1661 /* FIXME: the actimeo should be tunable */ 1667 if (!time_in_range(jiffies, cifs_i->time,
1662 if (time_after_eq(jiffies, cifs_i->time + HZ)) 1668 cifs_i->time + cifs_sb->actimeo))
1663 return true; 1669 return true;
1664 1670
1665 /* hardlinked files w/ noserverino get "special" treatment */ 1671 /* hardlinked files w/ noserverino get "special" treatment */
1666 if (!(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) && 1672 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
1667 S_ISREG(inode->i_mode) && inode->i_nlink != 1) 1673 S_ISREG(inode->i_mode) && inode->i_nlink != 1)
1668 return true; 1674 return true;
1669 1675
1670 return false; 1676 return false;
1671} 1677}
1672 1678
1673/* check invalid_mapping flag and zap the cache if it's set */ 1679/*
1680 * Zap the cache. Called when invalid_mapping flag is set.
1681 */
1674static void 1682static void
1675cifs_invalidate_mapping(struct inode *inode) 1683cifs_invalidate_mapping(struct inode *inode)
1676{ 1684{
@@ -2114,11 +2122,16 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2114 2122
2115 if (attrs->ia_valid & ATTR_MODE) { 2123 if (attrs->ia_valid & ATTR_MODE) {
2116 rc = 0; 2124 rc = 0;
2117#ifdef CONFIG_CIFS_EXPERIMENTAL 2125#ifdef CONFIG_CIFS_ACL
2118 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) 2126 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
2119 rc = mode_to_acl(inode, full_path, mode); 2127 rc = mode_to_cifs_acl(inode, full_path, mode);
2120 else 2128 if (rc) {
2121#endif 2129 cFYI(1, "%s: Setting ACL failed with error: %d",
2130 __func__, rc);
2131 goto cifs_setattr_exit;
2132 }
2133 } else
2134#endif /* CONFIG_CIFS_ACL */
2122 if (((mode & S_IWUGO) == 0) && 2135 if (((mode & S_IWUGO) == 0) &&
2123 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { 2136 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
2124 2137
@@ -2177,7 +2190,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
2177 2190
2178 setattr_copy(inode, attrs); 2191 setattr_copy(inode, attrs);
2179 mark_inode_dirty(inode); 2192 mark_inode_dirty(inode);
2180 return 0;
2181 2193
2182cifs_setattr_exit: 2194cifs_setattr_exit:
2183 kfree(full_path); 2195 kfree(full_path);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 077bf756f342..0c98672d0122 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,10 +38,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
38 struct cifs_sb_info *cifs_sb; 38 struct cifs_sb_info *cifs_sb;
39#ifdef CONFIG_CIFS_POSIX 39#ifdef CONFIG_CIFS_POSIX
40 struct cifsFileInfo *pSMBFile = filep->private_data; 40 struct cifsFileInfo *pSMBFile = filep->private_data;
41 struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink); 41 struct cifsTconInfo *tcon;
42 __u64 ExtAttrBits = 0; 42 __u64 ExtAttrBits = 0;
43 __u64 ExtAttrMask = 0; 43 __u64 ExtAttrMask = 0;
44 __u64 caps = le64_to_cpu(tcon->fsUnixInfo.Capability); 44 __u64 caps;
45#endif /* CONFIG_CIFS_POSIX */ 45#endif /* CONFIG_CIFS_POSIX */
46 46
47 xid = GetXid(); 47 xid = GetXid();
@@ -62,9 +62,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
62 break; 62 break;
63#ifdef CONFIG_CIFS_POSIX 63#ifdef CONFIG_CIFS_POSIX
64 case FS_IOC_GETFLAGS: 64 case FS_IOC_GETFLAGS:
65 if (pSMBFile == NULL)
66 break;
67 tcon = tlink_tcon(pSMBFile->tlink);
68 caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
65 if (CIFS_UNIX_EXTATTR_CAP & caps) { 69 if (CIFS_UNIX_EXTATTR_CAP & caps) {
66 if (pSMBFile == NULL)
67 break;
68 rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid, 70 rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
69 &ExtAttrBits, &ExtAttrMask); 71 &ExtAttrBits, &ExtAttrMask);
70 if (rc == 0) 72 if (rc == 0)
@@ -75,13 +77,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
75 break; 77 break;
76 78
77 case FS_IOC_SETFLAGS: 79 case FS_IOC_SETFLAGS:
80 if (pSMBFile == NULL)
81 break;
82 tcon = tlink_tcon(pSMBFile->tlink);
83 caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
78 if (CIFS_UNIX_EXTATTR_CAP & caps) { 84 if (CIFS_UNIX_EXTATTR_CAP & caps) {
79 if (get_user(ExtAttrBits, (int __user *)arg)) { 85 if (get_user(ExtAttrBits, (int __user *)arg)) {
80 rc = -EFAULT; 86 rc = -EFAULT;
81 break; 87 break;
82 } 88 }
83 if (pSMBFile == NULL)
84 break;
85 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid, 89 /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
86 extAttrBits, &ExtAttrMask);*/ 90 extAttrBits, &ExtAttrMask);*/
87 } 91 }
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index 85cdbf831e7b..fe2f6a93c49e 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
525 rc); 525 rc);
526 } else { 526 } else {
527 if (pTcon->nocase) 527 if (pTcon->nocase)
528 direntry->d_op = &cifs_ci_dentry_ops; 528 d_set_d_op(direntry, &cifs_ci_dentry_ops);
529 else 529 else
530 direntry->d_op = &cifs_dentry_ops; 530 d_set_d_op(direntry, &cifs_dentry_ops);
531 d_instantiate(direntry, newinode); 531 d_instantiate(direntry, newinode);
532 } 532 }
533 } 533 }
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index c4e296fe3518..43f10281bc19 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,10 +569,9 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
569 569
570 cFYI(1, "file id match, oplock break"); 570 cFYI(1, "file id match, oplock break");
571 pCifsInode = CIFS_I(netfile->dentry->d_inode); 571 pCifsInode = CIFS_I(netfile->dentry->d_inode);
572 pCifsInode->clientCanCacheAll = false;
573 if (pSMB->OplockLevel == 0)
574 pCifsInode->clientCanCacheRead = false;
575 572
573 cifs_set_oplock_level(pCifsInode,
574 pSMB->OplockLevel);
576 /* 575 /*
577 * cifs_oplock_break_put() can't be called 576 * cifs_oplock_break_put() can't be called
578 * from here. Get reference after queueing 577 * from here. Get reference after queueing
@@ -722,3 +721,23 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
722 cifs_sb_master_tcon(cifs_sb)->treeName); 721 cifs_sb_master_tcon(cifs_sb)->treeName);
723 } 722 }
724} 723}
724
725void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
726{
727 oplock &= 0xF;
728
729 if (oplock == OPLOCK_EXCLUSIVE) {
730 cinode->clientCanCacheAll = true;
731 cinode->clientCanCacheRead = true;
732 cFYI(1, "Exclusive Oplock granted on inode %p",
733 &cinode->vfs_inode);
734 } else if (oplock == OPLOCK_READ) {
735 cinode->clientCanCacheAll = false;
736 cinode->clientCanCacheRead = true;
737 cFYI(1, "Level II Oplock granted on inode %p",
738 &cinode->vfs_inode);
739 } else {
740 cinode->clientCanCacheAll = false;
741 cinode->clientCanCacheRead = false;
742 }
743}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ef7bb7b50f58..ec5b68e3b928 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
79 cFYI(1, "For %s", name->name); 79 cFYI(1, "For %s", name->name);
80 80
81 if (parent->d_op && parent->d_op->d_hash) 81 if (parent->d_op && parent->d_op->d_hash)
82 parent->d_op->d_hash(parent, name); 82 parent->d_op->d_hash(parent, parent->d_inode, name);
83 else 83 else
84 name->hash = full_name_hash(name->name, name->len); 84 name->hash = full_name_hash(name->name, name->len);
85 85
@@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
103 } 103 }
104 104
105 if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) 105 if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase)
106 dentry->d_op = &cifs_ci_dentry_ops; 106 d_set_d_op(dentry, &cifs_ci_dentry_ops);
107 else 107 else
108 dentry->d_op = &cifs_dentry_ops; 108 d_set_d_op(dentry, &cifs_dentry_ops);
109 109
110 alias = d_materialise_unique(dentry, inode); 110 alias = d_materialise_unique(dentry, inode);
111 if (alias != NULL) { 111 if (alias != NULL) {
@@ -226,26 +226,29 @@ static int initiate_cifs_search(const int xid, struct file *file)
226 char *full_path = NULL; 226 char *full_path = NULL;
227 struct cifsFileInfo *cifsFile; 227 struct cifsFileInfo *cifsFile;
228 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 228 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
229 struct tcon_link *tlink; 229 struct tcon_link *tlink = NULL;
230 struct cifsTconInfo *pTcon; 230 struct cifsTconInfo *pTcon;
231 231
232 tlink = cifs_sb_tlink(cifs_sb);
233 if (IS_ERR(tlink))
234 return PTR_ERR(tlink);
235 pTcon = tlink_tcon(tlink);
236
237 if (file->private_data == NULL)
238 file->private_data =
239 kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
240 if (file->private_data == NULL) { 232 if (file->private_data == NULL) {
241 rc = -ENOMEM; 233 tlink = cifs_sb_tlink(cifs_sb);
242 goto error_exit; 234 if (IS_ERR(tlink))
235 return PTR_ERR(tlink);
236
237 cifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
238 if (cifsFile == NULL) {
239 rc = -ENOMEM;
240 goto error_exit;
241 }
242 file->private_data = cifsFile;
243 cifsFile->tlink = cifs_get_tlink(tlink);
244 pTcon = tlink_tcon(tlink);
245 } else {
246 cifsFile = file->private_data;
247 pTcon = tlink_tcon(cifsFile->tlink);
243 } 248 }
244 249
245 cifsFile = file->private_data;
246 cifsFile->invalidHandle = true; 250 cifsFile->invalidHandle = true;
247 cifsFile->srch_inf.endOfSearch = false; 251 cifsFile->srch_inf.endOfSearch = false;
248 cifsFile->tlink = cifs_get_tlink(tlink);
249 252
250 full_path = build_path_from_dentry(file->f_path.dentry); 253 full_path = build_path_from_dentry(file->f_path.dentry);
251 if (full_path == NULL) { 254 if (full_path == NULL) {
@@ -756,18 +759,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
756 rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, 759 rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
757 ino, fattr.cf_dtype); 760 ino, fattr.cf_dtype);
758 761
759 /*
760 * we can not return filldir errors to the caller since they are
761 * "normal" when the stat blocksize is too small - we return remapped
762 * error instead
763 *
764 * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
765 * case already. Why should we be clobbering other errors from it?
766 */
767 if (rc) {
768 cFYI(1, "filldir rc = %d", rc);
769 rc = -EOVERFLOW;
770 }
771 dput(tmp_dentry); 762 dput(tmp_dentry);
772 return rc; 763 return rc;
773} 764}
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index a264b744bb41..eae2a1491608 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -30,10 +30,11 @@
30 30
31#define MAX_EA_VALUE_SIZE 65535 31#define MAX_EA_VALUE_SIZE 65535
32#define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib" 32#define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib"
33#define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
33#define CIFS_XATTR_USER_PREFIX "user." 34#define CIFS_XATTR_USER_PREFIX "user."
34#define CIFS_XATTR_SYSTEM_PREFIX "system." 35#define CIFS_XATTR_SYSTEM_PREFIX "system."
35#define CIFS_XATTR_OS2_PREFIX "os2." 36#define CIFS_XATTR_OS2_PREFIX "os2."
36#define CIFS_XATTR_SECURITY_PREFIX ".security" 37#define CIFS_XATTR_SECURITY_PREFIX "security."
37#define CIFS_XATTR_TRUSTED_PREFIX "trusted." 38#define CIFS_XATTR_TRUSTED_PREFIX "trusted."
38#define XATTR_TRUSTED_PREFIX_LEN 8 39#define XATTR_TRUSTED_PREFIX_LEN 8
39#define XATTR_SECURITY_PREFIX_LEN 9 40#define XATTR_SECURITY_PREFIX_LEN 9
@@ -277,29 +278,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
277 cifs_sb->local_nls, 278 cifs_sb->local_nls,
278 cifs_sb->mnt_cifs_flags & 279 cifs_sb->mnt_cifs_flags &
279 CIFS_MOUNT_MAP_SPECIAL_CHR); 280 CIFS_MOUNT_MAP_SPECIAL_CHR);
280#ifdef CONFIG_CIFS_EXPERIMENTAL
281 else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
282 __u16 fid;
283 int oplock = 0;
284 struct cifs_ntsd *pacl = NULL;
285 __u32 buflen = 0;
286 if (experimEnabled)
287 rc = CIFSSMBOpen(xid, pTcon, full_path,
288 FILE_OPEN, GENERIC_READ, 0, &fid,
289 &oplock, NULL, cifs_sb->local_nls,
290 cifs_sb->mnt_cifs_flags &
291 CIFS_MOUNT_MAP_SPECIAL_CHR);
292 /* else rc is EOPNOTSUPP from above */
293
294 if (rc == 0) {
295 rc = CIFSSMBGetCIFSACL(xid, pTcon, fid, &pacl,
296 &buflen);
297 CIFSSMBClose(xid, pTcon, fid);
298 }
299 }
300#endif /* EXPERIMENTAL */
301#else 281#else
302 cFYI(1, "query POSIX ACL not supported yet"); 282 cFYI(1, "Query POSIX ACL not supported yet");
303#endif /* CONFIG_CIFS_POSIX */ 283#endif /* CONFIG_CIFS_POSIX */
304 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT, 284 } else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
305 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) { 285 strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -311,8 +291,33 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
311 cifs_sb->mnt_cifs_flags & 291 cifs_sb->mnt_cifs_flags &
312 CIFS_MOUNT_MAP_SPECIAL_CHR); 292 CIFS_MOUNT_MAP_SPECIAL_CHR);
313#else 293#else
314 cFYI(1, "query POSIX default ACL not supported yet"); 294 cFYI(1, "Query POSIX default ACL not supported yet");
315#endif 295#endif /* CONFIG_CIFS_POSIX */
296 } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
297 strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
298#ifdef CONFIG_CIFS_ACL
299 u32 acllen;
300 struct cifs_ntsd *pacl;
301
302 pacl = get_cifs_acl(cifs_sb, direntry->d_inode,
303 full_path, &acllen);
304 if (IS_ERR(pacl)) {
305 rc = PTR_ERR(pacl);
306 cERROR(1, "%s: error %zd getting sec desc",
307 __func__, rc);
308 } else {
309 if (ea_value) {
310 if (acllen > buf_size)
311 acllen = -ERANGE;
312 else
313 memcpy(ea_value, pacl, acllen);
314 }
315 rc = acllen;
316 kfree(pacl);
317 }
318#else
319 cFYI(1, "Query CIFS ACL not supported yet");
320#endif /* CONFIG_CIFS_ACL */
316 } else if (strncmp(ea_name, 321 } else if (strncmp(ea_name,
317 CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) { 322 CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) {
318 cFYI(1, "Trusted xattr namespace not supported yet"); 323 cFYI(1, "Trusted xattr namespace not supported yet");
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 9060f08e70cf..5525e1c660fd 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
93 struct list_head *child; 93 struct list_head *child;
94 struct dentry *de; 94 struct dentry *de;
95 95
96 spin_lock(&dcache_lock); 96 spin_lock(&parent->d_lock);
97 list_for_each(child, &parent->d_subdirs) 97 list_for_each(child, &parent->d_subdirs)
98 { 98 {
99 de = list_entry(child, struct dentry, d_u.d_child); 99 de = list_entry(child, struct dentry, d_u.d_child);
@@ -102,7 +102,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
102 continue; 102 continue;
103 coda_flag_inode(de->d_inode, flag); 103 coda_flag_inode(de->d_inode, flag);
104 } 104 }
105 spin_unlock(&dcache_lock); 105 spin_unlock(&parent->d_lock);
106 return; 106 return;
107} 107}
108 108
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 5d8b35539601..29badd91360f 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -18,6 +18,7 @@
18#include <linux/errno.h> 18#include <linux/errno.h>
19#include <linux/string.h> 19#include <linux/string.h>
20#include <linux/spinlock.h> 20#include <linux/spinlock.h>
21#include <linux/namei.h>
21 22
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
23 24
@@ -47,7 +48,7 @@ static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
47 48
48/* dentry ops */ 49/* dentry ops */
49static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); 50static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd);
50static int coda_dentry_delete(struct dentry *); 51static int coda_dentry_delete(const struct dentry *);
51 52
52/* support routines */ 53/* support routines */
53static int coda_venus_readdir(struct file *coda_file, void *buf, 54static int coda_venus_readdir(struct file *coda_file, void *buf,
@@ -125,7 +126,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc
125 return ERR_PTR(error); 126 return ERR_PTR(error);
126 127
127exit: 128exit:
128 entry->d_op = &coda_dentry_operations; 129 d_set_d_op(entry, &coda_dentry_operations);
129 130
130 if (inode && (type & CODA_NOCACHE)) 131 if (inode && (type & CODA_NOCACHE))
131 coda_flag_inode(inode, C_VATTR | C_PURGE); 132 coda_flag_inode(inode, C_VATTR | C_PURGE);
@@ -134,10 +135,13 @@ exit:
134} 135}
135 136
136 137
137int coda_permission(struct inode *inode, int mask) 138int coda_permission(struct inode *inode, int mask, unsigned int flags)
138{ 139{
139 int error; 140 int error;
140 141
142 if (flags & IPERM_FLAG_RCU)
143 return -ECHILD;
144
141 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 145 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
142 146
143 if (!mask) 147 if (!mask)
@@ -541,9 +545,13 @@ out:
541/* called when a cache lookup succeeds */ 545/* called when a cache lookup succeeds */
542static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) 546static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
543{ 547{
544 struct inode *inode = de->d_inode; 548 struct inode *inode;
545 struct coda_inode_info *cii; 549 struct coda_inode_info *cii;
546 550
551 if (nd->flags & LOOKUP_RCU)
552 return -ECHILD;
553
554 inode = de->d_inode;
547 if (!inode || coda_isroot(inode)) 555 if (!inode || coda_isroot(inode))
548 goto out; 556 goto out;
549 if (is_bad_inode(inode)) 557 if (is_bad_inode(inode))
@@ -559,7 +567,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd)
559 if (cii->c_flags & C_FLUSH) 567 if (cii->c_flags & C_FLUSH)
560 coda_flag_inode_children(inode, C_FLUSH); 568 coda_flag_inode_children(inode, C_FLUSH);
561 569
562 if (atomic_read(&de->d_count) > 1) 570 if (de->d_count > 1)
563 /* pretend it's valid, but don't change the flags */ 571 /* pretend it's valid, but don't change the flags */
564 goto out; 572 goto out;
565 573
@@ -577,7 +585,7 @@ out:
577 * This is the callback from dput() when d_count is going to 0. 585 * This is the callback from dput() when d_count is going to 0.
578 * We use this to unhash dentries with bad inodes. 586 * We use this to unhash dentries with bad inodes.
579 */ 587 */
580static int coda_dentry_delete(struct dentry * dentry) 588static int coda_dentry_delete(const struct dentry * dentry)
581{ 589{
582 int flags; 590 int flags;
583 591
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5ea57c8c7f97..50dc7d189f56 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -56,11 +56,18 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
56 return &ei->vfs_inode; 56 return &ei->vfs_inode;
57} 57}
58 58
59static void coda_destroy_inode(struct inode *inode) 59static void coda_i_callback(struct rcu_head *head)
60{ 60{
61 struct inode *inode = container_of(head, struct inode, i_rcu);
62 INIT_LIST_HEAD(&inode->i_dentry);
61 kmem_cache_free(coda_inode_cachep, ITOC(inode)); 63 kmem_cache_free(coda_inode_cachep, ITOC(inode));
62} 64}
63 65
66static void coda_destroy_inode(struct inode *inode)
67{
68 call_rcu(&inode->i_rcu, coda_i_callback);
69}
70
64static void init_once(void *foo) 71static void init_once(void *foo)
65{ 72{
66 struct coda_inode_info *ei = (struct coda_inode_info *) foo; 73 struct coda_inode_info *ei = (struct coda_inode_info *) foo;
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 2fd89b5c5c7b..741f0bd03918 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
25 25
26/* pioctl ops */ 26/* pioctl ops */
27static int coda_ioctl_permission(struct inode *inode, int mask); 27static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
28static long coda_pioctl(struct file *filp, unsigned int cmd, 28static long coda_pioctl(struct file *filp, unsigned int cmd,
29 unsigned long user_data); 29 unsigned long user_data);
30 30
@@ -41,8 +41,10 @@ const struct file_operations coda_ioctl_operations = {
41}; 41};
42 42
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask) 44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
45{ 45{
46 if (flags & IPERM_FLAG_RCU)
47 return -ECHILD;
46 return (mask & MAY_EXEC) ? -EACCES : 0; 48 return (mask & MAY_EXEC) ? -EACCES : 0;
47} 49}
48 50
diff --git a/fs/compat.c b/fs/compat.c
index c580c322fa6b..eb1740ac8c0a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1350,6 +1350,10 @@ static int compat_count(compat_uptr_t __user *argv, int max)
1350 argv++; 1350 argv++;
1351 if (i++ >= max) 1351 if (i++ >= max)
1352 return -E2BIG; 1352 return -E2BIG;
1353
1354 if (fatal_signal_pending(current))
1355 return -ERESTARTNOHAND;
1356 cond_resched();
1353 } 1357 }
1354 } 1358 }
1355 return i; 1359 return i;
@@ -1391,6 +1395,12 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1391 while (len > 0) { 1395 while (len > 0) {
1392 int offset, bytes_to_copy; 1396 int offset, bytes_to_copy;
1393 1397
1398 if (fatal_signal_pending(current)) {
1399 ret = -ERESTARTNOHAND;
1400 goto out;
1401 }
1402 cond_resched();
1403
1394 offset = pos % PAGE_SIZE; 1404 offset = pos % PAGE_SIZE;
1395 if (offset == 0) 1405 if (offset == 0)
1396 offset = PAGE_SIZE; 1406 offset = PAGE_SIZE;
@@ -1407,18 +1417,8 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
1407 if (!kmapped_page || kpos != (pos & PAGE_MASK)) { 1417 if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
1408 struct page *page; 1418 struct page *page;
1409 1419
1410#ifdef CONFIG_STACK_GROWSUP 1420 page = get_arg_page(bprm, pos, 1);
1411 ret = expand_stack_downwards(bprm->vma, pos); 1421 if (!page) {
1412 if (ret < 0) {
1413 /* We've exceed the stack rlimit. */
1414 ret = -E2BIG;
1415 goto out;
1416 }
1417#endif
1418 ret = get_user_pages(current, bprm->mm, pos,
1419 1, 1, 1, &page, NULL);
1420 if (ret <= 0) {
1421 /* We've exceed the stack rlimit. */
1422 ret = -E2BIG; 1422 ret = -E2BIG;
1423 goto out; 1423 goto out;
1424 } 1424 }
@@ -1539,8 +1539,10 @@ int compat_do_execve(char * filename,
1539 return retval; 1539 return retval;
1540 1540
1541out: 1541out:
1542 if (bprm->mm) 1542 if (bprm->mm) {
1543 acct_arg_size(bprm, 0);
1543 mmput(bprm->mm); 1544 mmput(bprm->mm);
1545 }
1544 1546
1545out_file: 1547out_file:
1546 if (bprm->file) { 1548 if (bprm->file) {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 410ed188faa1..61abb638b4bf 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,7 +19,6 @@
19#include <linux/compiler.h> 19#include <linux/compiler.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/smp.h> 21#include <linux/smp.h>
22#include <linux/smp_lock.h>
23#include <linux/ioctl.h> 22#include <linux/ioctl.h>
24#include <linux/if.h> 23#include <linux/if.h>
25#include <linux/if_bridge.h> 24#include <linux/if_bridge.h>
@@ -43,7 +42,7 @@
43#include <linux/tty.h> 42#include <linux/tty.h>
44#include <linux/vt_kern.h> 43#include <linux/vt_kern.h>
45#include <linux/fb.h> 44#include <linux/fb.h>
46#include <linux/videodev.h> 45#include <linux/videodev2.h>
47#include <linux/netdevice.h> 46#include <linux/netdevice.h>
48#include <linux/raw.h> 47#include <linux/raw.h>
49#include <linux/blkdev.h> 48#include <linux/blkdev.h>
@@ -837,6 +836,7 @@ COMPATIBLE_IOCTL(TCSETSW)
837COMPATIBLE_IOCTL(TCSETSF) 836COMPATIBLE_IOCTL(TCSETSF)
838COMPATIBLE_IOCTL(TIOCLINUX) 837COMPATIBLE_IOCTL(TIOCLINUX)
839COMPATIBLE_IOCTL(TIOCSBRK) 838COMPATIBLE_IOCTL(TIOCSBRK)
839COMPATIBLE_IOCTL(TIOCGDEV)
840COMPATIBLE_IOCTL(TIOCCBRK) 840COMPATIBLE_IOCTL(TIOCCBRK)
841COMPATIBLE_IOCTL(TIOCGSID) 841COMPATIBLE_IOCTL(TIOCGSID)
842COMPATIBLE_IOCTL(TIOCGICOUNT) 842COMPATIBLE_IOCTL(TIOCGICOUNT)
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da6061a6df40..026cf68553a4 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -120,7 +120,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
120{ 120{
121 struct config_item * item = NULL; 121 struct config_item * item = NULL;
122 122
123 spin_lock(&dcache_lock); 123 spin_lock(&dentry->d_lock);
124 if (!d_unhashed(dentry)) { 124 if (!d_unhashed(dentry)) {
125 struct configfs_dirent * sd = dentry->d_fsdata; 125 struct configfs_dirent * sd = dentry->d_fsdata;
126 if (sd->s_type & CONFIGFS_ITEM_LINK) { 126 if (sd->s_type & CONFIGFS_ITEM_LINK) {
@@ -129,7 +129,7 @@ static inline struct config_item *configfs_get_config_item(struct dentry *dentry
129 } else 129 } else
130 item = config_item_get(sd->s_element); 130 item = config_item_get(sd->s_element);
131 } 131 }
132 spin_unlock(&dcache_lock); 132 spin_unlock(&dentry->d_lock);
133 133
134 return item; 134 return item;
135} 135}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 0b502f80c691..36637a8c1ed3 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -67,7 +67,7 @@ static void configfs_d_iput(struct dentry * dentry,
67 * We _must_ delete our dentries on last dput, as the chain-to-parent 67 * We _must_ delete our dentries on last dput, as the chain-to-parent
68 * behavior is required to clear the parents of default_groups. 68 * behavior is required to clear the parents of default_groups.
69 */ 69 */
70static int configfs_d_delete(struct dentry *dentry) 70static int configfs_d_delete(const struct dentry *dentry)
71{ 71{
72 return 1; 72 return 1;
73} 73}
@@ -232,10 +232,8 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
232 232
233 sd->s_mode = mode; 233 sd->s_mode = mode;
234 sd->s_dentry = dentry; 234 sd->s_dentry = dentry;
235 if (dentry) { 235 if (dentry)
236 dentry->d_fsdata = configfs_get(sd); 236 dentry->d_fsdata = configfs_get(sd);
237 dentry->d_op = &configfs_dentry_ops;
238 }
239 237
240 return 0; 238 return 0;
241} 239}
@@ -278,7 +276,6 @@ static int create_dir(struct config_item * k, struct dentry * p,
278 error = configfs_create(d, mode, init_dir); 276 error = configfs_create(d, mode, init_dir);
279 if (!error) { 277 if (!error) {
280 inc_nlink(p->d_inode); 278 inc_nlink(p->d_inode);
281 (d)->d_op = &configfs_dentry_ops;
282 } else { 279 } else {
283 struct configfs_dirent *sd = d->d_fsdata; 280 struct configfs_dirent *sd = d->d_fsdata;
284 if (sd) { 281 if (sd) {
@@ -371,9 +368,7 @@ int configfs_create_link(struct configfs_symlink *sl,
371 CONFIGFS_ITEM_LINK); 368 CONFIGFS_ITEM_LINK);
372 if (!err) { 369 if (!err) {
373 err = configfs_create(dentry, mode, init_symlink); 370 err = configfs_create(dentry, mode, init_symlink);
374 if (!err) 371 if (err) {
375 dentry->d_op = &configfs_dentry_ops;
376 else {
377 struct configfs_dirent *sd = dentry->d_fsdata; 372 struct configfs_dirent *sd = dentry->d_fsdata;
378 if (sd) { 373 if (sd) {
379 spin_lock(&configfs_dirent_lock); 374 spin_lock(&configfs_dirent_lock);
@@ -399,8 +394,7 @@ static void remove_dir(struct dentry * d)
399 if (d->d_inode) 394 if (d->d_inode)
400 simple_rmdir(parent->d_inode,d); 395 simple_rmdir(parent->d_inode,d);
401 396
402 pr_debug(" o %s removing done (%d)\n",d->d_name.name, 397 pr_debug(" o %s removing done (%d)\n",d->d_name.name, d->d_count);
403 atomic_read(&d->d_count));
404 398
405 dput(parent); 399 dput(parent);
406} 400}
@@ -448,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
448 return error; 442 return error;
449 } 443 }
450 444
451 dentry->d_op = &configfs_dentry_ops; 445 d_set_d_op(dentry, &configfs_dentry_ops);
452 d_rehash(dentry); 446 d_rehash(dentry);
453 447
454 return 0; 448 return 0;
@@ -493,7 +487,11 @@ static struct dentry * configfs_lookup(struct inode *dir,
493 * If it doesn't exist and it isn't a NOT_PINNED item, 487 * If it doesn't exist and it isn't a NOT_PINNED item,
494 * it must be negative. 488 * it must be negative.
495 */ 489 */
496 return simple_lookup(dir, dentry, nd); 490 if (dentry->d_name.len > NAME_MAX)
491 return ERR_PTR(-ENAMETOOLONG);
492 d_set_d_op(dentry, &configfs_dentry_ops);
493 d_add(dentry, NULL);
494 return NULL;
497 } 495 }
498 496
499out: 497out:
@@ -685,6 +683,7 @@ static int create_default_group(struct config_group *parent_group,
685 ret = -ENOMEM; 683 ret = -ENOMEM;
686 child = d_alloc(parent, &name); 684 child = d_alloc(parent, &name);
687 if (child) { 685 if (child) {
686 d_set_d_op(child, &configfs_dentry_ops);
688 d_add(child, NULL); 687 d_add(child, NULL);
689 688
690 ret = configfs_attach_group(&parent_group->cg_item, 689 ret = configfs_attach_group(&parent_group->cg_item,
@@ -1682,6 +1681,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1682 err = -ENOMEM; 1681 err = -ENOMEM;
1683 dentry = d_alloc(configfs_sb->s_root, &name); 1682 dentry = d_alloc(configfs_sb->s_root, &name);
1684 if (dentry) { 1683 if (dentry) {
1684 d_set_d_op(dentry, &configfs_dentry_ops);
1685 d_add(dentry, NULL); 1685 d_add(dentry, NULL);
1686 1686
1687 err = configfs_attach_group(sd->s_element, &group->cg_item, 1687 err = configfs_attach_group(sd->s_element, &group->cg_item,
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 253476d78ed8..c83f4768eeaa 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -250,18 +250,14 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
250 struct dentry * dentry = sd->s_dentry; 250 struct dentry * dentry = sd->s_dentry;
251 251
252 if (dentry) { 252 if (dentry) {
253 spin_lock(&dcache_lock);
254 spin_lock(&dentry->d_lock); 253 spin_lock(&dentry->d_lock);
255 if (!(d_unhashed(dentry) && dentry->d_inode)) { 254 if (!(d_unhashed(dentry) && dentry->d_inode)) {
256 dget_locked(dentry); 255 dget_dlock(dentry);
257 __d_drop(dentry); 256 __d_drop(dentry);
258 spin_unlock(&dentry->d_lock); 257 spin_unlock(&dentry->d_lock);
259 spin_unlock(&dcache_lock);
260 simple_unlink(parent->d_inode, dentry); 258 simple_unlink(parent->d_inode, dentry);
261 } else { 259 } else
262 spin_unlock(&dentry->d_lock); 260 spin_unlock(&dentry->d_lock);
263 spin_unlock(&dcache_lock);
264 }
265 } 261 }
266} 262}
267 263
diff --git a/fs/dcache.c b/fs/dcache.c
index 23702a9d4e6d..5699d4c027cb 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -33,20 +33,58 @@
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
35#include <linux/hardirq.h> 35#include <linux/hardirq.h>
36#include <linux/bit_spinlock.h>
37#include <linux/rculist_bl.h>
36#include "internal.h" 38#include "internal.h"
37 39
40/*
41 * Usage:
42 * dcache->d_inode->i_lock protects:
43 * - i_dentry, d_alias, d_inode of aliases
44 * dcache_hash_bucket lock protects:
45 * - the dcache hash table
46 * s_anon bl list spinlock protects:
47 * - the s_anon list (see __d_drop)
48 * dcache_lru_lock protects:
49 * - the dcache lru lists and counters
50 * d_lock protects:
51 * - d_flags
52 * - d_name
53 * - d_lru
54 * - d_count
55 * - d_unhashed()
56 * - d_parent and d_subdirs
57 * - childrens' d_child and d_parent
58 * - d_alias, d_inode
59 *
60 * Ordering:
61 * dentry->d_inode->i_lock
62 * dentry->d_lock
63 * dcache_lru_lock
64 * dcache_hash_bucket lock
65 * s_anon lock
66 *
67 * If there is an ancestor relationship:
68 * dentry->d_parent->...->d_parent->d_lock
69 * ...
70 * dentry->d_parent->d_lock
71 * dentry->d_lock
72 *
73 * If no ancestor relationship:
74 * if (dentry1 < dentry2)
75 * dentry1->d_lock
76 * dentry2->d_lock
77 */
38int sysctl_vfs_cache_pressure __read_mostly = 100; 78int sysctl_vfs_cache_pressure __read_mostly = 100;
39EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 79EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
40 80
41 __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lock); 81static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
42__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock); 82__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
43 83
44EXPORT_SYMBOL(dcache_lock); 84EXPORT_SYMBOL(rename_lock);
45 85
46static struct kmem_cache *dentry_cache __read_mostly; 86static struct kmem_cache *dentry_cache __read_mostly;
47 87
48#define DNAME_INLINE_LEN (sizeof(struct dentry)-offsetof(struct dentry,d_iname))
49
50/* 88/*
51 * This is the single most critical data structure when it comes 89 * This is the single most critical data structure when it comes
52 * to the dcache: the hashtable for lookups. Somebody should try 90 * to the dcache: the hashtable for lookups. Somebody should try
@@ -60,22 +98,51 @@ static struct kmem_cache *dentry_cache __read_mostly;
60 98
61static unsigned int d_hash_mask __read_mostly; 99static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 100static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 101
102struct dcache_hash_bucket {
103 struct hlist_bl_head head;
104};
105static struct dcache_hash_bucket *dentry_hashtable __read_mostly;
106
107static inline struct dcache_hash_bucket *d_hash(struct dentry *parent,
108 unsigned long hash)
109{
110 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
111 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
112 return dentry_hashtable + (hash & D_HASHMASK);
113}
114
115static inline void spin_lock_bucket(struct dcache_hash_bucket *b)
116{
117 bit_spin_lock(0, (unsigned long *)&b->head.first);
118}
119
120static inline void spin_unlock_bucket(struct dcache_hash_bucket *b)
121{
122 __bit_spin_unlock(0, (unsigned long *)&b->head.first);
123}
64 124
65/* Statistics gathering. */ 125/* Statistics gathering. */
66struct dentry_stat_t dentry_stat = { 126struct dentry_stat_t dentry_stat = {
67 .age_limit = 45, 127 .age_limit = 45,
68}; 128};
69 129
70static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; 130static DEFINE_PER_CPU(unsigned int, nr_dentry);
71static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
72 131
73#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) 132#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
133static int get_nr_dentry(void)
134{
135 int i;
136 int sum = 0;
137 for_each_possible_cpu(i)
138 sum += per_cpu(nr_dentry, i);
139 return sum < 0 ? 0 : sum;
140}
141
74int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, 142int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
75 size_t *lenp, loff_t *ppos) 143 size_t *lenp, loff_t *ppos)
76{ 144{
77 dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); 145 dentry_stat.nr_dentry = get_nr_dentry();
78 dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
79 return proc_dointvec(table, write, buffer, lenp, ppos); 146 return proc_dointvec(table, write, buffer, lenp, ppos);
80} 147}
81#endif 148#endif
@@ -91,35 +158,50 @@ static void __d_free(struct rcu_head *head)
91} 158}
92 159
93/* 160/*
94 * no dcache_lock, please. 161 * no locks, please.
95 */ 162 */
96static void d_free(struct dentry *dentry) 163static void d_free(struct dentry *dentry)
97{ 164{
98 percpu_counter_dec(&nr_dentry); 165 BUG_ON(dentry->d_count);
166 this_cpu_dec(nr_dentry);
99 if (dentry->d_op && dentry->d_op->d_release) 167 if (dentry->d_op && dentry->d_op->d_release)
100 dentry->d_op->d_release(dentry); 168 dentry->d_op->d_release(dentry);
101 169
102 /* if dentry was never inserted into hash, immediate free is OK */ 170 /* if dentry was never inserted into hash, immediate free is OK */
103 if (hlist_unhashed(&dentry->d_hash)) 171 if (hlist_bl_unhashed(&dentry->d_hash))
104 __d_free(&dentry->d_u.d_rcu); 172 __d_free(&dentry->d_u.d_rcu);
105 else 173 else
106 call_rcu(&dentry->d_u.d_rcu, __d_free); 174 call_rcu(&dentry->d_u.d_rcu, __d_free);
107} 175}
108 176
177/**
178 * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
179 * After this call, in-progress rcu-walk path lookup will fail. This
180 * should be called after unhashing, and after changing d_inode (if
181 * the dentry has not already been unhashed).
182 */
183static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
184{
185 assert_spin_locked(&dentry->d_lock);
186 /* Go through a barrier */
187 write_seqcount_barrier(&dentry->d_seq);
188}
189
109/* 190/*
110 * Release the dentry's inode, using the filesystem 191 * Release the dentry's inode, using the filesystem
111 * d_iput() operation if defined. 192 * d_iput() operation if defined. Dentry has no refcount
193 * and is unhashed.
112 */ 194 */
113static void dentry_iput(struct dentry * dentry) 195static void dentry_iput(struct dentry * dentry)
114 __releases(dentry->d_lock) 196 __releases(dentry->d_lock)
115 __releases(dcache_lock) 197 __releases(dentry->d_inode->i_lock)
116{ 198{
117 struct inode *inode = dentry->d_inode; 199 struct inode *inode = dentry->d_inode;
118 if (inode) { 200 if (inode) {
119 dentry->d_inode = NULL; 201 dentry->d_inode = NULL;
120 list_del_init(&dentry->d_alias); 202 list_del_init(&dentry->d_alias);
121 spin_unlock(&dentry->d_lock); 203 spin_unlock(&dentry->d_lock);
122 spin_unlock(&dcache_lock); 204 spin_unlock(&inode->i_lock);
123 if (!inode->i_nlink) 205 if (!inode->i_nlink)
124 fsnotify_inoderemove(inode); 206 fsnotify_inoderemove(inode);
125 if (dentry->d_op && dentry->d_op->d_iput) 207 if (dentry->d_op && dentry->d_op->d_iput)
@@ -128,40 +210,72 @@ static void dentry_iput(struct dentry * dentry)
128 iput(inode); 210 iput(inode);
129 } else { 211 } else {
130 spin_unlock(&dentry->d_lock); 212 spin_unlock(&dentry->d_lock);
131 spin_unlock(&dcache_lock);
132 } 213 }
133} 214}
134 215
135/* 216/*
136 * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. 217 * Release the dentry's inode, using the filesystem
218 * d_iput() operation if defined. dentry remains in-use.
219 */
220static void dentry_unlink_inode(struct dentry * dentry)
221 __releases(dentry->d_lock)
222 __releases(dentry->d_inode->i_lock)
223{
224 struct inode *inode = dentry->d_inode;
225 dentry->d_inode = NULL;
226 list_del_init(&dentry->d_alias);
227 dentry_rcuwalk_barrier(dentry);
228 spin_unlock(&dentry->d_lock);
229 spin_unlock(&inode->i_lock);
230 if (!inode->i_nlink)
231 fsnotify_inoderemove(inode);
232 if (dentry->d_op && dentry->d_op->d_iput)
233 dentry->d_op->d_iput(dentry, inode);
234 else
235 iput(inode);
236}
237
238/*
239 * dentry_lru_(add|del|move_tail) must be called with d_lock held.
137 */ 240 */
138static void dentry_lru_add(struct dentry *dentry) 241static void dentry_lru_add(struct dentry *dentry)
139{ 242{
140 if (list_empty(&dentry->d_lru)) { 243 if (list_empty(&dentry->d_lru)) {
244 spin_lock(&dcache_lru_lock);
141 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 245 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
142 dentry->d_sb->s_nr_dentry_unused++; 246 dentry->d_sb->s_nr_dentry_unused++;
143 percpu_counter_inc(&nr_dentry_unused); 247 dentry_stat.nr_unused++;
248 spin_unlock(&dcache_lru_lock);
144 } 249 }
145} 250}
146 251
252static void __dentry_lru_del(struct dentry *dentry)
253{
254 list_del_init(&dentry->d_lru);
255 dentry->d_sb->s_nr_dentry_unused--;
256 dentry_stat.nr_unused--;
257}
258
147static void dentry_lru_del(struct dentry *dentry) 259static void dentry_lru_del(struct dentry *dentry)
148{ 260{
149 if (!list_empty(&dentry->d_lru)) { 261 if (!list_empty(&dentry->d_lru)) {
150 list_del_init(&dentry->d_lru); 262 spin_lock(&dcache_lru_lock);
151 dentry->d_sb->s_nr_dentry_unused--; 263 __dentry_lru_del(dentry);
152 percpu_counter_dec(&nr_dentry_unused); 264 spin_unlock(&dcache_lru_lock);
153 } 265 }
154} 266}
155 267
156static void dentry_lru_move_tail(struct dentry *dentry) 268static void dentry_lru_move_tail(struct dentry *dentry)
157{ 269{
270 spin_lock(&dcache_lru_lock);
158 if (list_empty(&dentry->d_lru)) { 271 if (list_empty(&dentry->d_lru)) {
159 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 272 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
160 dentry->d_sb->s_nr_dentry_unused++; 273 dentry->d_sb->s_nr_dentry_unused++;
161 percpu_counter_inc(&nr_dentry_unused); 274 dentry_stat.nr_unused++;
162 } else { 275 } else {
163 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); 276 list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
164 } 277 }
278 spin_unlock(&dcache_lru_lock);
165} 279}
166 280
167/** 281/**
@@ -171,22 +285,115 @@ static void dentry_lru_move_tail(struct dentry *dentry)
171 * The dentry must already be unhashed and removed from the LRU. 285 * The dentry must already be unhashed and removed from the LRU.
172 * 286 *
173 * If this is the root of the dentry tree, return NULL. 287 * If this is the root of the dentry tree, return NULL.
288 *
289 * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
290 * d_kill.
174 */ 291 */
175static struct dentry *d_kill(struct dentry *dentry) 292static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
176 __releases(dentry->d_lock) 293 __releases(dentry->d_lock)
177 __releases(dcache_lock) 294 __releases(parent->d_lock)
295 __releases(dentry->d_inode->i_lock)
178{ 296{
179 struct dentry *parent; 297 dentry->d_parent = NULL;
180
181 list_del(&dentry->d_u.d_child); 298 list_del(&dentry->d_u.d_child);
182 /*drops the locks, at that point nobody can reach this dentry */ 299 if (parent)
300 spin_unlock(&parent->d_lock);
183 dentry_iput(dentry); 301 dentry_iput(dentry);
302 /*
303 * dentry_iput drops the locks, at which point nobody (except
304 * transient RCU lookups) can reach this dentry.
305 */
306 d_free(dentry);
307 return parent;
308}
309
310/**
311 * d_drop - drop a dentry
312 * @dentry: dentry to drop
313 *
314 * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
315 * be found through a VFS lookup any more. Note that this is different from
316 * deleting the dentry - d_delete will try to mark the dentry negative if
317 * possible, giving a successful _negative_ lookup, while d_drop will
318 * just make the cache lookup fail.
319 *
320 * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
321 * reason (NFS timeouts or autofs deletes).
322 *
323 * __d_drop requires dentry->d_lock.
324 */
325void __d_drop(struct dentry *dentry)
326{
327 if (!(dentry->d_flags & DCACHE_UNHASHED)) {
328 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) {
329 bit_spin_lock(0,
330 (unsigned long *)&dentry->d_sb->s_anon.first);
331 dentry->d_flags |= DCACHE_UNHASHED;
332 hlist_bl_del_init(&dentry->d_hash);
333 __bit_spin_unlock(0,
334 (unsigned long *)&dentry->d_sb->s_anon.first);
335 } else {
336 struct dcache_hash_bucket *b;
337 b = d_hash(dentry->d_parent, dentry->d_name.hash);
338 spin_lock_bucket(b);
339 /*
340 * We may not actually need to put DCACHE_UNHASHED
341 * manipulations under the hash lock, but follow
342 * the principle of least surprise.
343 */
344 dentry->d_flags |= DCACHE_UNHASHED;
345 hlist_bl_del_rcu(&dentry->d_hash);
346 spin_unlock_bucket(b);
347 dentry_rcuwalk_barrier(dentry);
348 }
349 }
350}
351EXPORT_SYMBOL(__d_drop);
352
353void d_drop(struct dentry *dentry)
354{
355 spin_lock(&dentry->d_lock);
356 __d_drop(dentry);
357 spin_unlock(&dentry->d_lock);
358}
359EXPORT_SYMBOL(d_drop);
360
361/*
362 * Finish off a dentry we've decided to kill.
363 * dentry->d_lock must be held, returns with it unlocked.
364 * If ref is non-zero, then decrement the refcount too.
365 * Returns dentry requiring refcount drop, or NULL if we're done.
366 */
367static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
368 __releases(dentry->d_lock)
369{
370 struct inode *inode;
371 struct dentry *parent;
372
373 inode = dentry->d_inode;
374 if (inode && !spin_trylock(&inode->i_lock)) {
375relock:
376 spin_unlock(&dentry->d_lock);
377 cpu_relax();
378 return dentry; /* try again with same dentry */
379 }
184 if (IS_ROOT(dentry)) 380 if (IS_ROOT(dentry))
185 parent = NULL; 381 parent = NULL;
186 else 382 else
187 parent = dentry->d_parent; 383 parent = dentry->d_parent;
188 d_free(dentry); 384 if (parent && !spin_trylock(&parent->d_lock)) {
189 return parent; 385 if (inode)
386 spin_unlock(&inode->i_lock);
387 goto relock;
388 }
389
390 if (ref)
391 dentry->d_count--;
392 /* if dentry was on the d_lru list delete it from there */
393 dentry_lru_del(dentry);
394 /* if it was on the hash then remove it */
395 __d_drop(dentry);
396 return d_kill(dentry, parent);
190} 397}
191 398
192/* 399/*
@@ -214,34 +421,26 @@ static struct dentry *d_kill(struct dentry *dentry)
214 * call the dentry unlink method as well as removing it from the queues and 421 * call the dentry unlink method as well as removing it from the queues and
215 * releasing its resources. If the parent dentries were scheduled for release 422 * releasing its resources. If the parent dentries were scheduled for release
216 * they too may now get deleted. 423 * they too may now get deleted.
217 *
218 * no dcache lock, please.
219 */ 424 */
220
221void dput(struct dentry *dentry) 425void dput(struct dentry *dentry)
222{ 426{
223 if (!dentry) 427 if (!dentry)
224 return; 428 return;
225 429
226repeat: 430repeat:
227 if (atomic_read(&dentry->d_count) == 1) 431 if (dentry->d_count == 1)
228 might_sleep(); 432 might_sleep();
229 if (!atomic_dec_and_lock(&dentry->d_count, &dcache_lock))
230 return;
231
232 spin_lock(&dentry->d_lock); 433 spin_lock(&dentry->d_lock);
233 if (atomic_read(&dentry->d_count)) { 434 BUG_ON(!dentry->d_count);
435 if (dentry->d_count > 1) {
436 dentry->d_count--;
234 spin_unlock(&dentry->d_lock); 437 spin_unlock(&dentry->d_lock);
235 spin_unlock(&dcache_lock);
236 return; 438 return;
237 } 439 }
238 440
239 /* 441 if (dentry->d_flags & DCACHE_OP_DELETE) {
240 * AV: ->d_delete() is _NOT_ allowed to block now.
241 */
242 if (dentry->d_op && dentry->d_op->d_delete) {
243 if (dentry->d_op->d_delete(dentry)) 442 if (dentry->d_op->d_delete(dentry))
244 goto unhash_it; 443 goto kill_it;
245 } 444 }
246 445
247 /* Unreachable? Get rid of it */ 446 /* Unreachable? Get rid of it */
@@ -252,16 +451,12 @@ repeat:
252 dentry->d_flags |= DCACHE_REFERENCED; 451 dentry->d_flags |= DCACHE_REFERENCED;
253 dentry_lru_add(dentry); 452 dentry_lru_add(dentry);
254 453
255 spin_unlock(&dentry->d_lock); 454 dentry->d_count--;
256 spin_unlock(&dcache_lock); 455 spin_unlock(&dentry->d_lock);
257 return; 456 return;
258 457
259unhash_it:
260 __d_drop(dentry);
261kill_it: 458kill_it:
262 /* if dentry was on the d_lru list delete it from there */ 459 dentry = dentry_kill(dentry, 1);
263 dentry_lru_del(dentry);
264 dentry = d_kill(dentry);
265 if (dentry) 460 if (dentry)
266 goto repeat; 461 goto repeat;
267} 462}
@@ -284,9 +479,9 @@ int d_invalidate(struct dentry * dentry)
284 /* 479 /*
285 * If it's already been dropped, return OK. 480 * If it's already been dropped, return OK.
286 */ 481 */
287 spin_lock(&dcache_lock); 482 spin_lock(&dentry->d_lock);
288 if (d_unhashed(dentry)) { 483 if (d_unhashed(dentry)) {
289 spin_unlock(&dcache_lock); 484 spin_unlock(&dentry->d_lock);
290 return 0; 485 return 0;
291 } 486 }
292 /* 487 /*
@@ -294,9 +489,9 @@ int d_invalidate(struct dentry * dentry)
294 * to get rid of unused child entries. 489 * to get rid of unused child entries.
295 */ 490 */
296 if (!list_empty(&dentry->d_subdirs)) { 491 if (!list_empty(&dentry->d_subdirs)) {
297 spin_unlock(&dcache_lock); 492 spin_unlock(&dentry->d_lock);
298 shrink_dcache_parent(dentry); 493 shrink_dcache_parent(dentry);
299 spin_lock(&dcache_lock); 494 spin_lock(&dentry->d_lock);
300 } 495 }
301 496
302 /* 497 /*
@@ -309,35 +504,61 @@ int d_invalidate(struct dentry * dentry)
309 * we might still populate it if it was a 504 * we might still populate it if it was a
310 * working directory or similar). 505 * working directory or similar).
311 */ 506 */
312 spin_lock(&dentry->d_lock); 507 if (dentry->d_count > 1) {
313 if (atomic_read(&dentry->d_count) > 1) {
314 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) { 508 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) {
315 spin_unlock(&dentry->d_lock); 509 spin_unlock(&dentry->d_lock);
316 spin_unlock(&dcache_lock);
317 return -EBUSY; 510 return -EBUSY;
318 } 511 }
319 } 512 }
320 513
321 __d_drop(dentry); 514 __d_drop(dentry);
322 spin_unlock(&dentry->d_lock); 515 spin_unlock(&dentry->d_lock);
323 spin_unlock(&dcache_lock);
324 return 0; 516 return 0;
325} 517}
326EXPORT_SYMBOL(d_invalidate); 518EXPORT_SYMBOL(d_invalidate);
327 519
328/* This should be called _only_ with dcache_lock held */ 520/* This must be called with d_lock held */
329static inline struct dentry * __dget_locked(struct dentry *dentry) 521static inline void __dget_dlock(struct dentry *dentry)
330{ 522{
331 atomic_inc(&dentry->d_count); 523 dentry->d_count++;
332 dentry_lru_del(dentry);
333 return dentry;
334} 524}
335 525
336struct dentry * dget_locked(struct dentry *dentry) 526static inline void __dget(struct dentry *dentry)
337{ 527{
338 return __dget_locked(dentry); 528 spin_lock(&dentry->d_lock);
529 __dget_dlock(dentry);
530 spin_unlock(&dentry->d_lock);
531}
532
533struct dentry *dget_parent(struct dentry *dentry)
534{
535 struct dentry *ret;
536
537repeat:
538 /*
539 * Don't need rcu_dereference because we re-check it was correct under
540 * the lock.
541 */
542 rcu_read_lock();
543 ret = dentry->d_parent;
544 if (!ret) {
545 rcu_read_unlock();
546 goto out;
547 }
548 spin_lock(&ret->d_lock);
549 if (unlikely(ret != dentry->d_parent)) {
550 spin_unlock(&ret->d_lock);
551 rcu_read_unlock();
552 goto repeat;
553 }
554 rcu_read_unlock();
555 BUG_ON(!ret->d_count);
556 ret->d_count++;
557 spin_unlock(&ret->d_lock);
558out:
559 return ret;
339} 560}
340EXPORT_SYMBOL(dget_locked); 561EXPORT_SYMBOL(dget_parent);
341 562
342/** 563/**
343 * d_find_alias - grab a hashed alias of inode 564 * d_find_alias - grab a hashed alias of inode
@@ -355,42 +576,51 @@ EXPORT_SYMBOL(dget_locked);
355 * any other hashed alias over that one unless @want_discon is set, 576 * any other hashed alias over that one unless @want_discon is set,
356 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. 577 * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
357 */ 578 */
358 579static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
359static struct dentry * __d_find_alias(struct inode *inode, int want_discon)
360{ 580{
361 struct list_head *head, *next, *tmp; 581 struct dentry *alias, *discon_alias;
362 struct dentry *alias, *discon_alias=NULL;
363 582
364 head = &inode->i_dentry; 583again:
365 next = inode->i_dentry.next; 584 discon_alias = NULL;
366 while (next != head) { 585 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
367 tmp = next; 586 spin_lock(&alias->d_lock);
368 next = tmp->next;
369 prefetch(next);
370 alias = list_entry(tmp, struct dentry, d_alias);
371 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 587 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
372 if (IS_ROOT(alias) && 588 if (IS_ROOT(alias) &&
373 (alias->d_flags & DCACHE_DISCONNECTED)) 589 (alias->d_flags & DCACHE_DISCONNECTED)) {
374 discon_alias = alias; 590 discon_alias = alias;
375 else if (!want_discon) { 591 } else if (!want_discon) {
376 __dget_locked(alias); 592 __dget_dlock(alias);
593 spin_unlock(&alias->d_lock);
594 return alias;
595 }
596 }
597 spin_unlock(&alias->d_lock);
598 }
599 if (discon_alias) {
600 alias = discon_alias;
601 spin_lock(&alias->d_lock);
602 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
603 if (IS_ROOT(alias) &&
604 (alias->d_flags & DCACHE_DISCONNECTED)) {
605 __dget_dlock(alias);
606 spin_unlock(&alias->d_lock);
377 return alias; 607 return alias;
378 } 608 }
379 } 609 }
610 spin_unlock(&alias->d_lock);
611 goto again;
380 } 612 }
381 if (discon_alias) 613 return NULL;
382 __dget_locked(discon_alias);
383 return discon_alias;
384} 614}
385 615
386struct dentry * d_find_alias(struct inode *inode) 616struct dentry *d_find_alias(struct inode *inode)
387{ 617{
388 struct dentry *de = NULL; 618 struct dentry *de = NULL;
389 619
390 if (!list_empty(&inode->i_dentry)) { 620 if (!list_empty(&inode->i_dentry)) {
391 spin_lock(&dcache_lock); 621 spin_lock(&inode->i_lock);
392 de = __d_find_alias(inode, 0); 622 de = __d_find_alias(inode, 0);
393 spin_unlock(&dcache_lock); 623 spin_unlock(&inode->i_lock);
394 } 624 }
395 return de; 625 return de;
396} 626}
@@ -404,54 +634,61 @@ void d_prune_aliases(struct inode *inode)
404{ 634{
405 struct dentry *dentry; 635 struct dentry *dentry;
406restart: 636restart:
407 spin_lock(&dcache_lock); 637 spin_lock(&inode->i_lock);
408 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 638 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
409 spin_lock(&dentry->d_lock); 639 spin_lock(&dentry->d_lock);
410 if (!atomic_read(&dentry->d_count)) { 640 if (!dentry->d_count) {
411 __dget_locked(dentry); 641 __dget_dlock(dentry);
412 __d_drop(dentry); 642 __d_drop(dentry);
413 spin_unlock(&dentry->d_lock); 643 spin_unlock(&dentry->d_lock);
414 spin_unlock(&dcache_lock); 644 spin_unlock(&inode->i_lock);
415 dput(dentry); 645 dput(dentry);
416 goto restart; 646 goto restart;
417 } 647 }
418 spin_unlock(&dentry->d_lock); 648 spin_unlock(&dentry->d_lock);
419 } 649 }
420 spin_unlock(&dcache_lock); 650 spin_unlock(&inode->i_lock);
421} 651}
422EXPORT_SYMBOL(d_prune_aliases); 652EXPORT_SYMBOL(d_prune_aliases);
423 653
424/* 654/*
425 * Throw away a dentry - free the inode, dput the parent. This requires that 655 * Try to throw away a dentry - free the inode, dput the parent.
426 * the LRU list has already been removed. 656 * Requires dentry->d_lock is held, and dentry->d_count == 0.
657 * Releases dentry->d_lock.
427 * 658 *
428 * Try to prune ancestors as well. This is necessary to prevent 659 * This may fail if locks cannot be acquired no problem, just try again.
429 * quadratic behavior of shrink_dcache_parent(), but is also expected
430 * to be beneficial in reducing dentry cache fragmentation.
431 */ 660 */
432static void prune_one_dentry(struct dentry * dentry) 661static void try_prune_one_dentry(struct dentry *dentry)
433 __releases(dentry->d_lock) 662 __releases(dentry->d_lock)
434 __releases(dcache_lock)
435 __acquires(dcache_lock)
436{ 663{
437 __d_drop(dentry); 664 struct dentry *parent;
438 dentry = d_kill(dentry);
439 665
666 parent = dentry_kill(dentry, 0);
440 /* 667 /*
441 * Prune ancestors. Locking is simpler than in dput(), 668 * If dentry_kill returns NULL, we have nothing more to do.
442 * because dcache_lock needs to be taken anyway. 669 * if it returns the same dentry, trylocks failed. In either
670 * case, just loop again.
671 *
672 * Otherwise, we need to prune ancestors too. This is necessary
673 * to prevent quadratic behavior of shrink_dcache_parent(), but
674 * is also expected to be beneficial in reducing dentry cache
675 * fragmentation.
443 */ 676 */
444 spin_lock(&dcache_lock); 677 if (!parent)
678 return;
679 if (parent == dentry)
680 return;
681
682 /* Prune ancestors. */
683 dentry = parent;
445 while (dentry) { 684 while (dentry) {
446 if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) 685 spin_lock(&dentry->d_lock);
686 if (dentry->d_count > 1) {
687 dentry->d_count--;
688 spin_unlock(&dentry->d_lock);
447 return; 689 return;
448 690 }
449 if (dentry->d_op && dentry->d_op->d_delete) 691 dentry = dentry_kill(dentry, 1);
450 dentry->d_op->d_delete(dentry);
451 dentry_lru_del(dentry);
452 __d_drop(dentry);
453 dentry = d_kill(dentry);
454 spin_lock(&dcache_lock);
455 } 692 }
456} 693}
457 694
@@ -459,24 +696,35 @@ static void shrink_dentry_list(struct list_head *list)
459{ 696{
460 struct dentry *dentry; 697 struct dentry *dentry;
461 698
462 while (!list_empty(list)) { 699 rcu_read_lock();
463 dentry = list_entry(list->prev, struct dentry, d_lru); 700 for (;;) {
464 dentry_lru_del(dentry); 701 dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
702 if (&dentry->d_lru == list)
703 break; /* empty */
704 spin_lock(&dentry->d_lock);
705 if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
706 spin_unlock(&dentry->d_lock);
707 continue;
708 }
465 709
466 /* 710 /*
467 * We found an inuse dentry which was not removed from 711 * We found an inuse dentry which was not removed from
468 * the LRU because of laziness during lookup. Do not free 712 * the LRU because of laziness during lookup. Do not free
469 * it - just keep it off the LRU list. 713 * it - just keep it off the LRU list.
470 */ 714 */
471 spin_lock(&dentry->d_lock); 715 if (dentry->d_count) {
472 if (atomic_read(&dentry->d_count)) { 716 dentry_lru_del(dentry);
473 spin_unlock(&dentry->d_lock); 717 spin_unlock(&dentry->d_lock);
474 continue; 718 continue;
475 } 719 }
476 prune_one_dentry(dentry); 720
477 /* dentry->d_lock was dropped in prune_one_dentry() */ 721 rcu_read_unlock();
478 cond_resched_lock(&dcache_lock); 722
723 try_prune_one_dentry(dentry);
724
725 rcu_read_lock();
479 } 726 }
727 rcu_read_unlock();
480} 728}
481 729
482/** 730/**
@@ -495,42 +743,44 @@ static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
495 LIST_HEAD(tmp); 743 LIST_HEAD(tmp);
496 int cnt = *count; 744 int cnt = *count;
497 745
498 spin_lock(&dcache_lock); 746relock:
747 spin_lock(&dcache_lru_lock);
499 while (!list_empty(&sb->s_dentry_lru)) { 748 while (!list_empty(&sb->s_dentry_lru)) {
500 dentry = list_entry(sb->s_dentry_lru.prev, 749 dentry = list_entry(sb->s_dentry_lru.prev,
501 struct dentry, d_lru); 750 struct dentry, d_lru);
502 BUG_ON(dentry->d_sb != sb); 751 BUG_ON(dentry->d_sb != sb);
503 752
753 if (!spin_trylock(&dentry->d_lock)) {
754 spin_unlock(&dcache_lru_lock);
755 cpu_relax();
756 goto relock;
757 }
758
504 /* 759 /*
505 * If we are honouring the DCACHE_REFERENCED flag and the 760 * If we are honouring the DCACHE_REFERENCED flag and the
506 * dentry has this flag set, don't free it. Clear the flag 761 * dentry has this flag set, don't free it. Clear the flag
507 * and put it back on the LRU. 762 * and put it back on the LRU.
508 */ 763 */
509 if (flags & DCACHE_REFERENCED) { 764 if (flags & DCACHE_REFERENCED &&
510 spin_lock(&dentry->d_lock); 765 dentry->d_flags & DCACHE_REFERENCED) {
511 if (dentry->d_flags & DCACHE_REFERENCED) { 766 dentry->d_flags &= ~DCACHE_REFERENCED;
512 dentry->d_flags &= ~DCACHE_REFERENCED; 767 list_move(&dentry->d_lru, &referenced);
513 list_move(&dentry->d_lru, &referenced);
514 spin_unlock(&dentry->d_lock);
515 cond_resched_lock(&dcache_lock);
516 continue;
517 }
518 spin_unlock(&dentry->d_lock); 768 spin_unlock(&dentry->d_lock);
769 } else {
770 list_move_tail(&dentry->d_lru, &tmp);
771 spin_unlock(&dentry->d_lock);
772 if (!--cnt)
773 break;
519 } 774 }
520 775 cond_resched_lock(&dcache_lru_lock);
521 list_move_tail(&dentry->d_lru, &tmp);
522 if (!--cnt)
523 break;
524 cond_resched_lock(&dcache_lock);
525 } 776 }
526
527 *count = cnt;
528 shrink_dentry_list(&tmp);
529
530 if (!list_empty(&referenced)) 777 if (!list_empty(&referenced))
531 list_splice(&referenced, &sb->s_dentry_lru); 778 list_splice(&referenced, &sb->s_dentry_lru);
532 spin_unlock(&dcache_lock); 779 spin_unlock(&dcache_lru_lock);
533 780
781 shrink_dentry_list(&tmp);
782
783 *count = cnt;
534} 784}
535 785
536/** 786/**
@@ -546,13 +796,12 @@ static void prune_dcache(int count)
546{ 796{
547 struct super_block *sb, *p = NULL; 797 struct super_block *sb, *p = NULL;
548 int w_count; 798 int w_count;
549 int unused = percpu_counter_sum_positive(&nr_dentry_unused); 799 int unused = dentry_stat.nr_unused;
550 int prune_ratio; 800 int prune_ratio;
551 int pruned; 801 int pruned;
552 802
553 if (unused == 0 || count == 0) 803 if (unused == 0 || count == 0)
554 return; 804 return;
555 spin_lock(&dcache_lock);
556 if (count >= unused) 805 if (count >= unused)
557 prune_ratio = 1; 806 prune_ratio = 1;
558 else 807 else
@@ -589,11 +838,9 @@ static void prune_dcache(int count)
589 if (down_read_trylock(&sb->s_umount)) { 838 if (down_read_trylock(&sb->s_umount)) {
590 if ((sb->s_root != NULL) && 839 if ((sb->s_root != NULL) &&
591 (!list_empty(&sb->s_dentry_lru))) { 840 (!list_empty(&sb->s_dentry_lru))) {
592 spin_unlock(&dcache_lock);
593 __shrink_dcache_sb(sb, &w_count, 841 __shrink_dcache_sb(sb, &w_count,
594 DCACHE_REFERENCED); 842 DCACHE_REFERENCED);
595 pruned -= w_count; 843 pruned -= w_count;
596 spin_lock(&dcache_lock);
597 } 844 }
598 up_read(&sb->s_umount); 845 up_read(&sb->s_umount);
599 } 846 }
@@ -609,7 +856,6 @@ static void prune_dcache(int count)
609 if (p) 856 if (p)
610 __put_super(p); 857 __put_super(p);
611 spin_unlock(&sb_lock); 858 spin_unlock(&sb_lock);
612 spin_unlock(&dcache_lock);
613} 859}
614 860
615/** 861/**
@@ -623,12 +869,14 @@ void shrink_dcache_sb(struct super_block *sb)
623{ 869{
624 LIST_HEAD(tmp); 870 LIST_HEAD(tmp);
625 871
626 spin_lock(&dcache_lock); 872 spin_lock(&dcache_lru_lock);
627 while (!list_empty(&sb->s_dentry_lru)) { 873 while (!list_empty(&sb->s_dentry_lru)) {
628 list_splice_init(&sb->s_dentry_lru, &tmp); 874 list_splice_init(&sb->s_dentry_lru, &tmp);
875 spin_unlock(&dcache_lru_lock);
629 shrink_dentry_list(&tmp); 876 shrink_dentry_list(&tmp);
877 spin_lock(&dcache_lru_lock);
630 } 878 }
631 spin_unlock(&dcache_lock); 879 spin_unlock(&dcache_lru_lock);
632} 880}
633EXPORT_SYMBOL(shrink_dcache_sb); 881EXPORT_SYMBOL(shrink_dcache_sb);
634 882
@@ -645,10 +893,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
645 BUG_ON(!IS_ROOT(dentry)); 893 BUG_ON(!IS_ROOT(dentry));
646 894
647 /* detach this root from the system */ 895 /* detach this root from the system */
648 spin_lock(&dcache_lock); 896 spin_lock(&dentry->d_lock);
649 dentry_lru_del(dentry); 897 dentry_lru_del(dentry);
650 __d_drop(dentry); 898 __d_drop(dentry);
651 spin_unlock(&dcache_lock); 899 spin_unlock(&dentry->d_lock);
652 900
653 for (;;) { 901 for (;;) {
654 /* descend to the first leaf in the current subtree */ 902 /* descend to the first leaf in the current subtree */
@@ -657,14 +905,16 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
657 905
658 /* this is a branch with children - detach all of them 906 /* this is a branch with children - detach all of them
659 * from the system in one go */ 907 * from the system in one go */
660 spin_lock(&dcache_lock); 908 spin_lock(&dentry->d_lock);
661 list_for_each_entry(loop, &dentry->d_subdirs, 909 list_for_each_entry(loop, &dentry->d_subdirs,
662 d_u.d_child) { 910 d_u.d_child) {
911 spin_lock_nested(&loop->d_lock,
912 DENTRY_D_LOCK_NESTED);
663 dentry_lru_del(loop); 913 dentry_lru_del(loop);
664 __d_drop(loop); 914 __d_drop(loop);
665 cond_resched_lock(&dcache_lock); 915 spin_unlock(&loop->d_lock);
666 } 916 }
667 spin_unlock(&dcache_lock); 917 spin_unlock(&dentry->d_lock);
668 918
669 /* move to the first child */ 919 /* move to the first child */
670 dentry = list_entry(dentry->d_subdirs.next, 920 dentry = list_entry(dentry->d_subdirs.next,
@@ -676,7 +926,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
676 do { 926 do {
677 struct inode *inode; 927 struct inode *inode;
678 928
679 if (atomic_read(&dentry->d_count) != 0) { 929 if (dentry->d_count != 0) {
680 printk(KERN_ERR 930 printk(KERN_ERR
681 "BUG: Dentry %p{i=%lx,n=%s}" 931 "BUG: Dentry %p{i=%lx,n=%s}"
682 " still in use (%d)" 932 " still in use (%d)"
@@ -685,20 +935,23 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
685 dentry->d_inode ? 935 dentry->d_inode ?
686 dentry->d_inode->i_ino : 0UL, 936 dentry->d_inode->i_ino : 0UL,
687 dentry->d_name.name, 937 dentry->d_name.name,
688 atomic_read(&dentry->d_count), 938 dentry->d_count,
689 dentry->d_sb->s_type->name, 939 dentry->d_sb->s_type->name,
690 dentry->d_sb->s_id); 940 dentry->d_sb->s_id);
691 BUG(); 941 BUG();
692 } 942 }
693 943
694 if (IS_ROOT(dentry)) 944 if (IS_ROOT(dentry)) {
695 parent = NULL; 945 parent = NULL;
696 else { 946 list_del(&dentry->d_u.d_child);
947 } else {
697 parent = dentry->d_parent; 948 parent = dentry->d_parent;
698 atomic_dec(&parent->d_count); 949 spin_lock(&parent->d_lock);
950 parent->d_count--;
951 list_del(&dentry->d_u.d_child);
952 spin_unlock(&parent->d_lock);
699 } 953 }
700 954
701 list_del(&dentry->d_u.d_child);
702 detached++; 955 detached++;
703 956
704 inode = dentry->d_inode; 957 inode = dentry->d_inode;
@@ -728,8 +981,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
728 981
729/* 982/*
730 * destroy the dentries attached to a superblock on unmounting 983 * destroy the dentries attached to a superblock on unmounting
731 * - we don't need to use dentry->d_lock, and only need dcache_lock when 984 * - we don't need to use dentry->d_lock because:
732 * removing the dentry from the system lists and hashes because:
733 * - the superblock is detached from all mountings and open files, so the 985 * - the superblock is detached from all mountings and open files, so the
734 * dentry trees will not be rearranged by the VFS 986 * dentry trees will not be rearranged by the VFS
735 * - s_umount is write-locked, so the memory pressure shrinker will ignore 987 * - s_umount is write-locked, so the memory pressure shrinker will ignore
@@ -746,11 +998,13 @@ void shrink_dcache_for_umount(struct super_block *sb)
746 998
747 dentry = sb->s_root; 999 dentry = sb->s_root;
748 sb->s_root = NULL; 1000 sb->s_root = NULL;
749 atomic_dec(&dentry->d_count); 1001 spin_lock(&dentry->d_lock);
1002 dentry->d_count--;
1003 spin_unlock(&dentry->d_lock);
750 shrink_dcache_for_umount_subtree(dentry); 1004 shrink_dcache_for_umount_subtree(dentry);
751 1005
752 while (!hlist_empty(&sb->s_anon)) { 1006 while (!hlist_bl_empty(&sb->s_anon)) {
753 dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash); 1007 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
754 shrink_dcache_for_umount_subtree(dentry); 1008 shrink_dcache_for_umount_subtree(dentry);
755 } 1009 }
756} 1010}
@@ -768,15 +1022,20 @@ void shrink_dcache_for_umount(struct super_block *sb)
768 * Return true if the parent or its subdirectories contain 1022 * Return true if the parent or its subdirectories contain
769 * a mount point 1023 * a mount point
770 */ 1024 */
771
772int have_submounts(struct dentry *parent) 1025int have_submounts(struct dentry *parent)
773{ 1026{
774 struct dentry *this_parent = parent; 1027 struct dentry *this_parent;
775 struct list_head *next; 1028 struct list_head *next;
1029 unsigned seq;
1030 int locked = 0;
1031
1032 seq = read_seqbegin(&rename_lock);
1033again:
1034 this_parent = parent;
776 1035
777 spin_lock(&dcache_lock);
778 if (d_mountpoint(parent)) 1036 if (d_mountpoint(parent))
779 goto positive; 1037 goto positive;
1038 spin_lock(&this_parent->d_lock);
780repeat: 1039repeat:
781 next = this_parent->d_subdirs.next; 1040 next = this_parent->d_subdirs.next;
782resume: 1041resume:
@@ -784,27 +1043,65 @@ resume:
784 struct list_head *tmp = next; 1043 struct list_head *tmp = next;
785 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1044 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
786 next = tmp->next; 1045 next = tmp->next;
1046
1047 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
787 /* Have we found a mount point ? */ 1048 /* Have we found a mount point ? */
788 if (d_mountpoint(dentry)) 1049 if (d_mountpoint(dentry)) {
1050 spin_unlock(&dentry->d_lock);
1051 spin_unlock(&this_parent->d_lock);
789 goto positive; 1052 goto positive;
1053 }
790 if (!list_empty(&dentry->d_subdirs)) { 1054 if (!list_empty(&dentry->d_subdirs)) {
1055 spin_unlock(&this_parent->d_lock);
1056 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
791 this_parent = dentry; 1057 this_parent = dentry;
1058 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
792 goto repeat; 1059 goto repeat;
793 } 1060 }
1061 spin_unlock(&dentry->d_lock);
794 } 1062 }
795 /* 1063 /*
796 * All done at this level ... ascend and resume the search. 1064 * All done at this level ... ascend and resume the search.
797 */ 1065 */
798 if (this_parent != parent) { 1066 if (this_parent != parent) {
799 next = this_parent->d_u.d_child.next; 1067 struct dentry *tmp;
800 this_parent = this_parent->d_parent; 1068 struct dentry *child;
1069
1070 tmp = this_parent->d_parent;
1071 rcu_read_lock();
1072 spin_unlock(&this_parent->d_lock);
1073 child = this_parent;
1074 this_parent = tmp;
1075 spin_lock(&this_parent->d_lock);
1076 /* might go back up the wrong parent if we have had a rename
1077 * or deletion */
1078 if (this_parent != child->d_parent ||
1079 (!locked && read_seqretry(&rename_lock, seq))) {
1080 spin_unlock(&this_parent->d_lock);
1081 rcu_read_unlock();
1082 goto rename_retry;
1083 }
1084 rcu_read_unlock();
1085 next = child->d_u.d_child.next;
801 goto resume; 1086 goto resume;
802 } 1087 }
803 spin_unlock(&dcache_lock); 1088 spin_unlock(&this_parent->d_lock);
1089 if (!locked && read_seqretry(&rename_lock, seq))
1090 goto rename_retry;
1091 if (locked)
1092 write_sequnlock(&rename_lock);
804 return 0; /* No mount points found in tree */ 1093 return 0; /* No mount points found in tree */
805positive: 1094positive:
806 spin_unlock(&dcache_lock); 1095 if (!locked && read_seqretry(&rename_lock, seq))
1096 goto rename_retry;
1097 if (locked)
1098 write_sequnlock(&rename_lock);
807 return 1; 1099 return 1;
1100
1101rename_retry:
1102 locked = 1;
1103 write_seqlock(&rename_lock);
1104 goto again;
808} 1105}
809EXPORT_SYMBOL(have_submounts); 1106EXPORT_SYMBOL(have_submounts);
810 1107
@@ -824,11 +1121,16 @@ EXPORT_SYMBOL(have_submounts);
824 */ 1121 */
825static int select_parent(struct dentry * parent) 1122static int select_parent(struct dentry * parent)
826{ 1123{
827 struct dentry *this_parent = parent; 1124 struct dentry *this_parent;
828 struct list_head *next; 1125 struct list_head *next;
1126 unsigned seq;
829 int found = 0; 1127 int found = 0;
1128 int locked = 0;
830 1129
831 spin_lock(&dcache_lock); 1130 seq = read_seqbegin(&rename_lock);
1131again:
1132 this_parent = parent;
1133 spin_lock(&this_parent->d_lock);
832repeat: 1134repeat:
833 next = this_parent->d_subdirs.next; 1135 next = this_parent->d_subdirs.next;
834resume: 1136resume:
@@ -837,11 +1139,13 @@ resume:
837 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 1139 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
838 next = tmp->next; 1140 next = tmp->next;
839 1141
1142 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1143
840 /* 1144 /*
841 * move only zero ref count dentries to the end 1145 * move only zero ref count dentries to the end
842 * of the unused list for prune_dcache 1146 * of the unused list for prune_dcache
843 */ 1147 */
844 if (!atomic_read(&dentry->d_count)) { 1148 if (!dentry->d_count) {
845 dentry_lru_move_tail(dentry); 1149 dentry_lru_move_tail(dentry);
846 found++; 1150 found++;
847 } else { 1151 } else {
@@ -853,28 +1157,63 @@ resume:
853 * ensures forward progress). We'll be coming back to find 1157 * ensures forward progress). We'll be coming back to find
854 * the rest. 1158 * the rest.
855 */ 1159 */
856 if (found && need_resched()) 1160 if (found && need_resched()) {
1161 spin_unlock(&dentry->d_lock);
857 goto out; 1162 goto out;
1163 }
858 1164
859 /* 1165 /*
860 * Descend a level if the d_subdirs list is non-empty. 1166 * Descend a level if the d_subdirs list is non-empty.
861 */ 1167 */
862 if (!list_empty(&dentry->d_subdirs)) { 1168 if (!list_empty(&dentry->d_subdirs)) {
1169 spin_unlock(&this_parent->d_lock);
1170 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
863 this_parent = dentry; 1171 this_parent = dentry;
1172 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
864 goto repeat; 1173 goto repeat;
865 } 1174 }
1175
1176 spin_unlock(&dentry->d_lock);
866 } 1177 }
867 /* 1178 /*
868 * All done at this level ... ascend and resume the search. 1179 * All done at this level ... ascend and resume the search.
869 */ 1180 */
870 if (this_parent != parent) { 1181 if (this_parent != parent) {
871 next = this_parent->d_u.d_child.next; 1182 struct dentry *tmp;
872 this_parent = this_parent->d_parent; 1183 struct dentry *child;
1184
1185 tmp = this_parent->d_parent;
1186 rcu_read_lock();
1187 spin_unlock(&this_parent->d_lock);
1188 child = this_parent;
1189 this_parent = tmp;
1190 spin_lock(&this_parent->d_lock);
1191 /* might go back up the wrong parent if we have had a rename
1192 * or deletion */
1193 if (this_parent != child->d_parent ||
1194 (!locked && read_seqretry(&rename_lock, seq))) {
1195 spin_unlock(&this_parent->d_lock);
1196 rcu_read_unlock();
1197 goto rename_retry;
1198 }
1199 rcu_read_unlock();
1200 next = child->d_u.d_child.next;
873 goto resume; 1201 goto resume;
874 } 1202 }
875out: 1203out:
876 spin_unlock(&dcache_lock); 1204 spin_unlock(&this_parent->d_lock);
1205 if (!locked && read_seqretry(&rename_lock, seq))
1206 goto rename_retry;
1207 if (locked)
1208 write_sequnlock(&rename_lock);
877 return found; 1209 return found;
1210
1211rename_retry:
1212 if (found)
1213 return found;
1214 locked = 1;
1215 write_seqlock(&rename_lock);
1216 goto again;
878} 1217}
879 1218
880/** 1219/**
@@ -908,16 +1247,13 @@ EXPORT_SYMBOL(shrink_dcache_parent);
908 */ 1247 */
909static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) 1248static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
910{ 1249{
911 int nr_unused;
912
913 if (nr) { 1250 if (nr) {
914 if (!(gfp_mask & __GFP_FS)) 1251 if (!(gfp_mask & __GFP_FS))
915 return -1; 1252 return -1;
916 prune_dcache(nr); 1253 prune_dcache(nr);
917 } 1254 }
918 1255
919 nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); 1256 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
920 return (nr_unused / 100) * sysctl_vfs_cache_pressure;
921} 1257}
922 1258
923static struct shrinker dcache_shrinker = { 1259static struct shrinker dcache_shrinker = {
@@ -960,38 +1296,52 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
960 memcpy(dname, name->name, name->len); 1296 memcpy(dname, name->name, name->len);
961 dname[name->len] = 0; 1297 dname[name->len] = 0;
962 1298
963 atomic_set(&dentry->d_count, 1); 1299 dentry->d_count = 1;
964 dentry->d_flags = DCACHE_UNHASHED; 1300 dentry->d_flags = DCACHE_UNHASHED;
965 spin_lock_init(&dentry->d_lock); 1301 spin_lock_init(&dentry->d_lock);
1302 seqcount_init(&dentry->d_seq);
966 dentry->d_inode = NULL; 1303 dentry->d_inode = NULL;
967 dentry->d_parent = NULL; 1304 dentry->d_parent = NULL;
968 dentry->d_sb = NULL; 1305 dentry->d_sb = NULL;
969 dentry->d_op = NULL; 1306 dentry->d_op = NULL;
970 dentry->d_fsdata = NULL; 1307 dentry->d_fsdata = NULL;
971 dentry->d_mounted = 0; 1308 INIT_HLIST_BL_NODE(&dentry->d_hash);
972 INIT_HLIST_NODE(&dentry->d_hash);
973 INIT_LIST_HEAD(&dentry->d_lru); 1309 INIT_LIST_HEAD(&dentry->d_lru);
974 INIT_LIST_HEAD(&dentry->d_subdirs); 1310 INIT_LIST_HEAD(&dentry->d_subdirs);
975 INIT_LIST_HEAD(&dentry->d_alias); 1311 INIT_LIST_HEAD(&dentry->d_alias);
1312 INIT_LIST_HEAD(&dentry->d_u.d_child);
976 1313
977 if (parent) { 1314 if (parent) {
978 dentry->d_parent = dget(parent); 1315 spin_lock(&parent->d_lock);
1316 /*
1317 * don't need child lock because it is not subject
1318 * to concurrency here
1319 */
1320 __dget_dlock(parent);
1321 dentry->d_parent = parent;
979 dentry->d_sb = parent->d_sb; 1322 dentry->d_sb = parent->d_sb;
980 } else {
981 INIT_LIST_HEAD(&dentry->d_u.d_child);
982 }
983
984 spin_lock(&dcache_lock);
985 if (parent)
986 list_add(&dentry->d_u.d_child, &parent->d_subdirs); 1323 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
987 spin_unlock(&dcache_lock); 1324 spin_unlock(&parent->d_lock);
1325 }
988 1326
989 percpu_counter_inc(&nr_dentry); 1327 this_cpu_inc(nr_dentry);
990 1328
991 return dentry; 1329 return dentry;
992} 1330}
993EXPORT_SYMBOL(d_alloc); 1331EXPORT_SYMBOL(d_alloc);
994 1332
1333struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1334{
1335 struct dentry *dentry = d_alloc(NULL, name);
1336 if (dentry) {
1337 dentry->d_sb = sb;
1338 dentry->d_parent = dentry;
1339 dentry->d_flags |= DCACHE_DISCONNECTED;
1340 }
1341 return dentry;
1342}
1343EXPORT_SYMBOL(d_alloc_pseudo);
1344
995struct dentry *d_alloc_name(struct dentry *parent, const char *name) 1345struct dentry *d_alloc_name(struct dentry *parent, const char *name)
996{ 1346{
997 struct qstr q; 1347 struct qstr q;
@@ -1003,12 +1353,36 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
1003} 1353}
1004EXPORT_SYMBOL(d_alloc_name); 1354EXPORT_SYMBOL(d_alloc_name);
1005 1355
1006/* the caller must hold dcache_lock */ 1356void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
1357{
1358 BUG_ON(dentry->d_op);
1359 BUG_ON(dentry->d_flags & (DCACHE_OP_HASH |
1360 DCACHE_OP_COMPARE |
1361 DCACHE_OP_REVALIDATE |
1362 DCACHE_OP_DELETE ));
1363 dentry->d_op = op;
1364 if (!op)
1365 return;
1366 if (op->d_hash)
1367 dentry->d_flags |= DCACHE_OP_HASH;
1368 if (op->d_compare)
1369 dentry->d_flags |= DCACHE_OP_COMPARE;
1370 if (op->d_revalidate)
1371 dentry->d_flags |= DCACHE_OP_REVALIDATE;
1372 if (op->d_delete)
1373 dentry->d_flags |= DCACHE_OP_DELETE;
1374
1375}
1376EXPORT_SYMBOL(d_set_d_op);
1377
1007static void __d_instantiate(struct dentry *dentry, struct inode *inode) 1378static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1008{ 1379{
1380 spin_lock(&dentry->d_lock);
1009 if (inode) 1381 if (inode)
1010 list_add(&dentry->d_alias, &inode->i_dentry); 1382 list_add(&dentry->d_alias, &inode->i_dentry);
1011 dentry->d_inode = inode; 1383 dentry->d_inode = inode;
1384 dentry_rcuwalk_barrier(dentry);
1385 spin_unlock(&dentry->d_lock);
1012 fsnotify_d_instantiate(dentry, inode); 1386 fsnotify_d_instantiate(dentry, inode);
1013} 1387}
1014 1388
@@ -1030,9 +1404,11 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1030void d_instantiate(struct dentry *entry, struct inode * inode) 1404void d_instantiate(struct dentry *entry, struct inode * inode)
1031{ 1405{
1032 BUG_ON(!list_empty(&entry->d_alias)); 1406 BUG_ON(!list_empty(&entry->d_alias));
1033 spin_lock(&dcache_lock); 1407 if (inode)
1408 spin_lock(&inode->i_lock);
1034 __d_instantiate(entry, inode); 1409 __d_instantiate(entry, inode);
1035 spin_unlock(&dcache_lock); 1410 if (inode)
1411 spin_unlock(&inode->i_lock);
1036 security_d_instantiate(entry, inode); 1412 security_d_instantiate(entry, inode);
1037} 1413}
1038EXPORT_SYMBOL(d_instantiate); 1414EXPORT_SYMBOL(d_instantiate);
@@ -1069,15 +1445,18 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1069 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1445 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
1070 struct qstr *qstr = &alias->d_name; 1446 struct qstr *qstr = &alias->d_name;
1071 1447
1448 /*
1449 * Don't need alias->d_lock here, because aliases with
1450 * d_parent == entry->d_parent are not subject to name or
1451 * parent changes, because the parent inode i_mutex is held.
1452 */
1072 if (qstr->hash != hash) 1453 if (qstr->hash != hash)
1073 continue; 1454 continue;
1074 if (alias->d_parent != entry->d_parent) 1455 if (alias->d_parent != entry->d_parent)
1075 continue; 1456 continue;
1076 if (qstr->len != len) 1457 if (dentry_cmp(qstr->name, qstr->len, name, len))
1077 continue; 1458 continue;
1078 if (memcmp(qstr->name, name, len)) 1459 __dget(alias);
1079 continue;
1080 dget_locked(alias);
1081 return alias; 1460 return alias;
1082 } 1461 }
1083 1462
@@ -1091,9 +1470,11 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1091 1470
1092 BUG_ON(!list_empty(&entry->d_alias)); 1471 BUG_ON(!list_empty(&entry->d_alias));
1093 1472
1094 spin_lock(&dcache_lock); 1473 if (inode)
1474 spin_lock(&inode->i_lock);
1095 result = __d_instantiate_unique(entry, inode); 1475 result = __d_instantiate_unique(entry, inode);
1096 spin_unlock(&dcache_lock); 1476 if (inode)
1477 spin_unlock(&inode->i_lock);
1097 1478
1098 if (!result) { 1479 if (!result) {
1099 security_d_instantiate(entry, inode); 1480 security_d_instantiate(entry, inode);
@@ -1134,14 +1515,6 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1134} 1515}
1135EXPORT_SYMBOL(d_alloc_root); 1516EXPORT_SYMBOL(d_alloc_root);
1136 1517
1137static inline struct hlist_head *d_hash(struct dentry *parent,
1138 unsigned long hash)
1139{
1140 hash += ((unsigned long) parent ^ GOLDEN_RATIO_PRIME) / L1_CACHE_BYTES;
1141 hash = hash ^ ((hash ^ GOLDEN_RATIO_PRIME) >> D_HASHBITS);
1142 return dentry_hashtable + (hash & D_HASHMASK);
1143}
1144
1145/** 1518/**
1146 * d_obtain_alias - find or allocate a dentry for a given inode 1519 * d_obtain_alias - find or allocate a dentry for a given inode
1147 * @inode: inode to allocate the dentry for 1520 * @inode: inode to allocate the dentry for
@@ -1182,10 +1555,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
1182 } 1555 }
1183 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1556 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1184 1557
1185 spin_lock(&dcache_lock); 1558
1559 spin_lock(&inode->i_lock);
1186 res = __d_find_alias(inode, 0); 1560 res = __d_find_alias(inode, 0);
1187 if (res) { 1561 if (res) {
1188 spin_unlock(&dcache_lock); 1562 spin_unlock(&inode->i_lock);
1189 dput(tmp); 1563 dput(tmp);
1190 goto out_iput; 1564 goto out_iput;
1191 } 1565 }
@@ -1195,12 +1569,14 @@ struct dentry *d_obtain_alias(struct inode *inode)
1195 tmp->d_sb = inode->i_sb; 1569 tmp->d_sb = inode->i_sb;
1196 tmp->d_inode = inode; 1570 tmp->d_inode = inode;
1197 tmp->d_flags |= DCACHE_DISCONNECTED; 1571 tmp->d_flags |= DCACHE_DISCONNECTED;
1198 tmp->d_flags &= ~DCACHE_UNHASHED;
1199 list_add(&tmp->d_alias, &inode->i_dentry); 1572 list_add(&tmp->d_alias, &inode->i_dentry);
1200 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); 1573 bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1574 tmp->d_flags &= ~DCACHE_UNHASHED;
1575 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1576 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1201 spin_unlock(&tmp->d_lock); 1577 spin_unlock(&tmp->d_lock);
1578 spin_unlock(&inode->i_lock);
1202 1579
1203 spin_unlock(&dcache_lock);
1204 return tmp; 1580 return tmp;
1205 1581
1206 out_iput: 1582 out_iput:
@@ -1230,18 +1606,18 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1230 struct dentry *new = NULL; 1606 struct dentry *new = NULL;
1231 1607
1232 if (inode && S_ISDIR(inode->i_mode)) { 1608 if (inode && S_ISDIR(inode->i_mode)) {
1233 spin_lock(&dcache_lock); 1609 spin_lock(&inode->i_lock);
1234 new = __d_find_alias(inode, 1); 1610 new = __d_find_alias(inode, 1);
1235 if (new) { 1611 if (new) {
1236 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1612 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1237 spin_unlock(&dcache_lock); 1613 spin_unlock(&inode->i_lock);
1238 security_d_instantiate(new, inode); 1614 security_d_instantiate(new, inode);
1239 d_move(new, dentry); 1615 d_move(new, dentry);
1240 iput(inode); 1616 iput(inode);
1241 } else { 1617 } else {
1242 /* already taking dcache_lock, so d_add() by hand */ 1618 /* already taking inode->i_lock, so d_add() by hand */
1243 __d_instantiate(dentry, inode); 1619 __d_instantiate(dentry, inode);
1244 spin_unlock(&dcache_lock); 1620 spin_unlock(&inode->i_lock);
1245 security_d_instantiate(dentry, inode); 1621 security_d_instantiate(dentry, inode);
1246 d_rehash(dentry); 1622 d_rehash(dentry);
1247 } 1623 }
@@ -1314,10 +1690,10 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1314 * Negative dentry: instantiate it unless the inode is a directory and 1690 * Negative dentry: instantiate it unless the inode is a directory and
1315 * already has a dentry. 1691 * already has a dentry.
1316 */ 1692 */
1317 spin_lock(&dcache_lock); 1693 spin_lock(&inode->i_lock);
1318 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1694 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
1319 __d_instantiate(found, inode); 1695 __d_instantiate(found, inode);
1320 spin_unlock(&dcache_lock); 1696 spin_unlock(&inode->i_lock);
1321 security_d_instantiate(found, inode); 1697 security_d_instantiate(found, inode);
1322 return found; 1698 return found;
1323 } 1699 }
@@ -1327,8 +1703,8 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1327 * reference to it, move it in place and use it. 1703 * reference to it, move it in place and use it.
1328 */ 1704 */
1329 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1705 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1330 dget_locked(new); 1706 __dget(new);
1331 spin_unlock(&dcache_lock); 1707 spin_unlock(&inode->i_lock);
1332 security_d_instantiate(found, inode); 1708 security_d_instantiate(found, inode);
1333 d_move(new, found); 1709 d_move(new, found);
1334 iput(inode); 1710 iput(inode);
@@ -1342,6 +1718,112 @@ err_out:
1342EXPORT_SYMBOL(d_add_ci); 1718EXPORT_SYMBOL(d_add_ci);
1343 1719
1344/** 1720/**
1721 * __d_lookup_rcu - search for a dentry (racy, store-free)
1722 * @parent: parent dentry
1723 * @name: qstr of name we wish to find
1724 * @seq: returns d_seq value at the point where the dentry was found
1725 * @inode: returns dentry->d_inode when the inode was found valid.
1726 * Returns: dentry, or NULL
1727 *
1728 * __d_lookup_rcu is the dcache lookup function for rcu-walk name
1729 * resolution (store-free path walking) design described in
1730 * Documentation/filesystems/path-lookup.txt.
1731 *
1732 * This is not to be used outside core vfs.
1733 *
1734 * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
1735 * held, and rcu_read_lock held. The returned dentry must not be stored into
1736 * without taking d_lock and checking d_seq sequence count against @seq
1737 * returned here.
1738 *
1739 * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
1740 * function.
1741 *
1742 * Alternatively, __d_lookup_rcu may be called again to look up the child of
1743 * the returned dentry, so long as its parent's seqlock is checked after the
1744 * child is looked up. Thus, an interlocking stepping of sequence lock checks
1745 * is formed, giving integrity down the path walk.
1746 */
1747struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1748 unsigned *seq, struct inode **inode)
1749{
1750 unsigned int len = name->len;
1751 unsigned int hash = name->hash;
1752 const unsigned char *str = name->name;
1753 struct dcache_hash_bucket *b = d_hash(parent, hash);
1754 struct hlist_bl_node *node;
1755 struct dentry *dentry;
1756
1757 /*
1758 * Note: There is significant duplication with __d_lookup_rcu which is
1759 * required to prevent single threaded performance regressions
1760 * especially on architectures where smp_rmb (in seqcounts) are costly.
1761 * Keep the two functions in sync.
1762 */
1763
1764 /*
1765 * The hash list is protected using RCU.
1766 *
1767 * Carefully use d_seq when comparing a candidate dentry, to avoid
1768 * races with d_move().
1769 *
1770 * It is possible that concurrent renames can mess up our list
1771 * walk here and result in missing our dentry, resulting in the
1772 * false-negative result. d_lookup() protects against concurrent
1773 * renames using rename_lock seqlock.
1774 *
1775 * See Documentation/vfs/dcache-locking.txt for more details.
1776 */
1777 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1778 struct inode *i;
1779 const char *tname;
1780 int tlen;
1781
1782 if (dentry->d_name.hash != hash)
1783 continue;
1784
1785seqretry:
1786 *seq = read_seqcount_begin(&dentry->d_seq);
1787 if (dentry->d_parent != parent)
1788 continue;
1789 if (d_unhashed(dentry))
1790 continue;
1791 tlen = dentry->d_name.len;
1792 tname = dentry->d_name.name;
1793 i = dentry->d_inode;
1794 prefetch(tname);
1795 if (i)
1796 prefetch(i);
1797 /*
1798 * This seqcount check is required to ensure name and
1799 * len are loaded atomically, so as not to walk off the
1800 * edge of memory when walking. If we could load this
1801 * atomically some other way, we could drop this check.
1802 */
1803 if (read_seqcount_retry(&dentry->d_seq, *seq))
1804 goto seqretry;
1805 if (parent->d_flags & DCACHE_OP_COMPARE) {
1806 if (parent->d_op->d_compare(parent, *inode,
1807 dentry, i,
1808 tlen, tname, name))
1809 continue;
1810 } else {
1811 if (dentry_cmp(tname, tlen, str, len))
1812 continue;
1813 }
1814 /*
1815 * No extra seqcount check is required after the name
1816 * compare. The caller must perform a seqcount check in
1817 * order to do anything useful with the returned dentry
1818 * anyway.
1819 */
1820 *inode = i;
1821 return dentry;
1822 }
1823 return NULL;
1824}
1825
1826/**
1345 * d_lookup - search for a dentry 1827 * d_lookup - search for a dentry
1346 * @parent: parent dentry 1828 * @parent: parent dentry
1347 * @name: qstr of name we wish to find 1829 * @name: qstr of name we wish to find
@@ -1352,10 +1834,10 @@ EXPORT_SYMBOL(d_add_ci);
1352 * dentry is returned. The caller must use dput to free the entry when it has 1834 * dentry is returned. The caller must use dput to free the entry when it has
1353 * finished using it. %NULL is returned if the dentry does not exist. 1835 * finished using it. %NULL is returned if the dentry does not exist.
1354 */ 1836 */
1355struct dentry * d_lookup(struct dentry * parent, struct qstr * name) 1837struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
1356{ 1838{
1357 struct dentry * dentry = NULL; 1839 struct dentry *dentry;
1358 unsigned long seq; 1840 unsigned seq;
1359 1841
1360 do { 1842 do {
1361 seq = read_seqbegin(&rename_lock); 1843 seq = read_seqbegin(&rename_lock);
@@ -1367,7 +1849,7 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
1367} 1849}
1368EXPORT_SYMBOL(d_lookup); 1850EXPORT_SYMBOL(d_lookup);
1369 1851
1370/* 1852/**
1371 * __d_lookup - search for a dentry (racy) 1853 * __d_lookup - search for a dentry (racy)
1372 * @parent: parent dentry 1854 * @parent: parent dentry
1373 * @name: qstr of name we wish to find 1855 * @name: qstr of name we wish to find
@@ -1382,17 +1864,24 @@ EXPORT_SYMBOL(d_lookup);
1382 * 1864 *
1383 * __d_lookup callers must be commented. 1865 * __d_lookup callers must be commented.
1384 */ 1866 */
1385struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) 1867struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1386{ 1868{
1387 unsigned int len = name->len; 1869 unsigned int len = name->len;
1388 unsigned int hash = name->hash; 1870 unsigned int hash = name->hash;
1389 const unsigned char *str = name->name; 1871 const unsigned char *str = name->name;
1390 struct hlist_head *head = d_hash(parent,hash); 1872 struct dcache_hash_bucket *b = d_hash(parent, hash);
1873 struct hlist_bl_node *node;
1391 struct dentry *found = NULL; 1874 struct dentry *found = NULL;
1392 struct hlist_node *node;
1393 struct dentry *dentry; 1875 struct dentry *dentry;
1394 1876
1395 /* 1877 /*
1878 * Note: There is significant duplication with __d_lookup_rcu which is
1879 * required to prevent single threaded performance regressions
1880 * especially on architectures where smp_rmb (in seqcounts) are costly.
1881 * Keep the two functions in sync.
1882 */
1883
1884 /*
1396 * The hash list is protected using RCU. 1885 * The hash list is protected using RCU.
1397 * 1886 *
1398 * Take d_lock when comparing a candidate dentry, to avoid races 1887 * Take d_lock when comparing a candidate dentry, to avoid races
@@ -1407,25 +1896,16 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1407 */ 1896 */
1408 rcu_read_lock(); 1897 rcu_read_lock();
1409 1898
1410 hlist_for_each_entry_rcu(dentry, node, head, d_hash) { 1899 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1411 struct qstr *qstr; 1900 const char *tname;
1901 int tlen;
1412 1902
1413 if (dentry->d_name.hash != hash) 1903 if (dentry->d_name.hash != hash)
1414 continue; 1904 continue;
1415 if (dentry->d_parent != parent)
1416 continue;
1417 1905
1418 spin_lock(&dentry->d_lock); 1906 spin_lock(&dentry->d_lock);
1419
1420 /*
1421 * Recheck the dentry after taking the lock - d_move may have
1422 * changed things. Don't bother checking the hash because
1423 * we're about to compare the whole name anyway.
1424 */
1425 if (dentry->d_parent != parent) 1907 if (dentry->d_parent != parent)
1426 goto next; 1908 goto next;
1427
1428 /* non-existing due to RCU? */
1429 if (d_unhashed(dentry)) 1909 if (d_unhashed(dentry))
1430 goto next; 1910 goto next;
1431 1911
@@ -1433,18 +1913,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
1433 * It is safe to compare names since d_move() cannot 1913 * It is safe to compare names since d_move() cannot
1434 * change the qstr (protected by d_lock). 1914 * change the qstr (protected by d_lock).
1435 */ 1915 */
1436 qstr = &dentry->d_name; 1916 tlen = dentry->d_name.len;
1437 if (parent->d_op && parent->d_op->d_compare) { 1917 tname = dentry->d_name.name;
1438 if (parent->d_op->d_compare(parent, qstr, name)) 1918 if (parent->d_flags & DCACHE_OP_COMPARE) {
1919 if (parent->d_op->d_compare(parent, parent->d_inode,
1920 dentry, dentry->d_inode,
1921 tlen, tname, name))
1439 goto next; 1922 goto next;
1440 } else { 1923 } else {
1441 if (qstr->len != len) 1924 if (dentry_cmp(tname, tlen, str, len))
1442 goto next;
1443 if (memcmp(qstr->name, str, len))
1444 goto next; 1925 goto next;
1445 } 1926 }
1446 1927
1447 atomic_inc(&dentry->d_count); 1928 dentry->d_count++;
1448 found = dentry; 1929 found = dentry;
1449 spin_unlock(&dentry->d_lock); 1930 spin_unlock(&dentry->d_lock);
1450 break; 1931 break;
@@ -1473,8 +1954,8 @@ struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
1473 * routine may choose to leave the hash value unchanged. 1954 * routine may choose to leave the hash value unchanged.
1474 */ 1955 */
1475 name->hash = full_name_hash(name->name, name->len); 1956 name->hash = full_name_hash(name->name, name->len);
1476 if (dir->d_op && dir->d_op->d_hash) { 1957 if (dir->d_flags & DCACHE_OP_HASH) {
1477 if (dir->d_op->d_hash(dir, name) < 0) 1958 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
1478 goto out; 1959 goto out;
1479 } 1960 }
1480 dentry = d_lookup(dir, name); 1961 dentry = d_lookup(dir, name);
@@ -1483,34 +1964,32 @@ out:
1483} 1964}
1484 1965
1485/** 1966/**
1486 * d_validate - verify dentry provided from insecure source 1967 * d_validate - verify dentry provided from insecure source (deprecated)
1487 * @dentry: The dentry alleged to be valid child of @dparent 1968 * @dentry: The dentry alleged to be valid child of @dparent
1488 * @dparent: The parent dentry (known to be valid) 1969 * @dparent: The parent dentry (known to be valid)
1489 * 1970 *
1490 * An insecure source has sent us a dentry, here we verify it and dget() it. 1971 * An insecure source has sent us a dentry, here we verify it and dget() it.
1491 * This is used by ncpfs in its readdir implementation. 1972 * This is used by ncpfs in its readdir implementation.
1492 * Zero is returned in the dentry is invalid. 1973 * Zero is returned in the dentry is invalid.
1974 *
1975 * This function is slow for big directories, and deprecated, do not use it.
1493 */ 1976 */
1494int d_validate(struct dentry *dentry, struct dentry *parent) 1977int d_validate(struct dentry *dentry, struct dentry *dparent)
1495{ 1978{
1496 struct hlist_head *head = d_hash(parent, dentry->d_name.hash); 1979 struct dentry *child;
1497 struct hlist_node *node;
1498 struct dentry *d;
1499
1500 /* Check whether the ptr might be valid at all.. */
1501 if (!kmem_ptr_validate(dentry_cache, dentry))
1502 return 0;
1503 if (dentry->d_parent != parent)
1504 return 0;
1505 1980
1506 rcu_read_lock(); 1981 spin_lock(&dparent->d_lock);
1507 hlist_for_each_entry_rcu(d, node, head, d_hash) { 1982 list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
1508 if (d == dentry) { 1983 if (dentry == child) {
1509 dget(dentry); 1984 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1985 __dget_dlock(dentry);
1986 spin_unlock(&dentry->d_lock);
1987 spin_unlock(&dparent->d_lock);
1510 return 1; 1988 return 1;
1511 } 1989 }
1512 } 1990 }
1513 rcu_read_unlock(); 1991 spin_unlock(&dparent->d_lock);
1992
1514 return 0; 1993 return 0;
1515} 1994}
1516EXPORT_SYMBOL(d_validate); 1995EXPORT_SYMBOL(d_validate);
@@ -1538,16 +2017,23 @@ EXPORT_SYMBOL(d_validate);
1538 2017
1539void d_delete(struct dentry * dentry) 2018void d_delete(struct dentry * dentry)
1540{ 2019{
2020 struct inode *inode;
1541 int isdir = 0; 2021 int isdir = 0;
1542 /* 2022 /*
1543 * Are we the only user? 2023 * Are we the only user?
1544 */ 2024 */
1545 spin_lock(&dcache_lock); 2025again:
1546 spin_lock(&dentry->d_lock); 2026 spin_lock(&dentry->d_lock);
1547 isdir = S_ISDIR(dentry->d_inode->i_mode); 2027 inode = dentry->d_inode;
1548 if (atomic_read(&dentry->d_count) == 1) { 2028 isdir = S_ISDIR(inode->i_mode);
2029 if (dentry->d_count == 1) {
2030 if (inode && !spin_trylock(&inode->i_lock)) {
2031 spin_unlock(&dentry->d_lock);
2032 cpu_relax();
2033 goto again;
2034 }
1549 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2035 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
1550 dentry_iput(dentry); 2036 dentry_unlink_inode(dentry);
1551 fsnotify_nameremove(dentry, isdir); 2037 fsnotify_nameremove(dentry, isdir);
1552 return; 2038 return;
1553 } 2039 }
@@ -1556,17 +2042,18 @@ void d_delete(struct dentry * dentry)
1556 __d_drop(dentry); 2042 __d_drop(dentry);
1557 2043
1558 spin_unlock(&dentry->d_lock); 2044 spin_unlock(&dentry->d_lock);
1559 spin_unlock(&dcache_lock);
1560 2045
1561 fsnotify_nameremove(dentry, isdir); 2046 fsnotify_nameremove(dentry, isdir);
1562} 2047}
1563EXPORT_SYMBOL(d_delete); 2048EXPORT_SYMBOL(d_delete);
1564 2049
1565static void __d_rehash(struct dentry * entry, struct hlist_head *list) 2050static void __d_rehash(struct dentry * entry, struct dcache_hash_bucket *b)
1566{ 2051{
1567 2052 BUG_ON(!d_unhashed(entry));
2053 spin_lock_bucket(b);
1568 entry->d_flags &= ~DCACHE_UNHASHED; 2054 entry->d_flags &= ~DCACHE_UNHASHED;
1569 hlist_add_head_rcu(&entry->d_hash, list); 2055 hlist_bl_add_head_rcu(&entry->d_hash, &b->head);
2056 spin_unlock_bucket(b);
1570} 2057}
1571 2058
1572static void _d_rehash(struct dentry * entry) 2059static void _d_rehash(struct dentry * entry)
@@ -1583,25 +2070,39 @@ static void _d_rehash(struct dentry * entry)
1583 2070
1584void d_rehash(struct dentry * entry) 2071void d_rehash(struct dentry * entry)
1585{ 2072{
1586 spin_lock(&dcache_lock);
1587 spin_lock(&entry->d_lock); 2073 spin_lock(&entry->d_lock);
1588 _d_rehash(entry); 2074 _d_rehash(entry);
1589 spin_unlock(&entry->d_lock); 2075 spin_unlock(&entry->d_lock);
1590 spin_unlock(&dcache_lock);
1591} 2076}
1592EXPORT_SYMBOL(d_rehash); 2077EXPORT_SYMBOL(d_rehash);
1593 2078
1594/* 2079/**
1595 * When switching names, the actual string doesn't strictly have to 2080 * dentry_update_name_case - update case insensitive dentry with a new name
1596 * be preserved in the target - because we're dropping the target 2081 * @dentry: dentry to be updated
1597 * anyway. As such, we can just do a simple memcpy() to copy over 2082 * @name: new name
1598 * the new name before we switch.
1599 * 2083 *
1600 * Note that we have to be a lot more careful about getting the hash 2084 * Update a case insensitive dentry with new case of name.
1601 * switched - we have to switch the hash value properly even if it 2085 *
1602 * then no longer matches the actual (corrupted) string of the target. 2086 * dentry must have been returned by d_lookup with name @name. Old and new
1603 * The hash value has to match the hash queue that the dentry is on.. 2087 * name lengths must match (ie. no d_compare which allows mismatched name
2088 * lengths).
2089 *
2090 * Parent inode i_mutex must be held over d_lookup and into this call (to
2091 * keep renames and concurrent inserts, and readdir(2) away).
1604 */ 2092 */
2093void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
2094{
2095 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
2096 BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
2097
2098 spin_lock(&dentry->d_lock);
2099 write_seqcount_begin(&dentry->d_seq);
2100 memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
2101 write_seqcount_end(&dentry->d_seq);
2102 spin_unlock(&dentry->d_lock);
2103}
2104EXPORT_SYMBOL(dentry_update_name_case);
2105
1605static void switch_names(struct dentry *dentry, struct dentry *target) 2106static void switch_names(struct dentry *dentry, struct dentry *target)
1606{ 2107{
1607 if (dname_external(target)) { 2108 if (dname_external(target)) {
@@ -1643,54 +2144,84 @@ static void switch_names(struct dentry *dentry, struct dentry *target)
1643 swap(dentry->d_name.len, target->d_name.len); 2144 swap(dentry->d_name.len, target->d_name.len);
1644} 2145}
1645 2146
2147static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2148{
2149 /*
2150 * XXXX: do we really need to take target->d_lock?
2151 */
2152 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2153 spin_lock(&target->d_parent->d_lock);
2154 else {
2155 if (d_ancestor(dentry->d_parent, target->d_parent)) {
2156 spin_lock(&dentry->d_parent->d_lock);
2157 spin_lock_nested(&target->d_parent->d_lock,
2158 DENTRY_D_LOCK_NESTED);
2159 } else {
2160 spin_lock(&target->d_parent->d_lock);
2161 spin_lock_nested(&dentry->d_parent->d_lock,
2162 DENTRY_D_LOCK_NESTED);
2163 }
2164 }
2165 if (target < dentry) {
2166 spin_lock_nested(&target->d_lock, 2);
2167 spin_lock_nested(&dentry->d_lock, 3);
2168 } else {
2169 spin_lock_nested(&dentry->d_lock, 2);
2170 spin_lock_nested(&target->d_lock, 3);
2171 }
2172}
2173
2174static void dentry_unlock_parents_for_move(struct dentry *dentry,
2175 struct dentry *target)
2176{
2177 if (target->d_parent != dentry->d_parent)
2178 spin_unlock(&dentry->d_parent->d_lock);
2179 if (target->d_parent != target)
2180 spin_unlock(&target->d_parent->d_lock);
2181}
2182
1646/* 2183/*
1647 * We cannibalize "target" when moving dentry on top of it, 2184 * When switching names, the actual string doesn't strictly have to
1648 * because it's going to be thrown away anyway. We could be more 2185 * be preserved in the target - because we're dropping the target
1649 * polite about it, though. 2186 * anyway. As such, we can just do a simple memcpy() to copy over
1650 * 2187 * the new name before we switch.
1651 * This forceful removal will result in ugly /proc output if 2188 *
1652 * somebody holds a file open that got deleted due to a rename. 2189 * Note that we have to be a lot more careful about getting the hash
1653 * We could be nicer about the deleted file, and let it show 2190 * switched - we have to switch the hash value properly even if it
1654 * up under the name it had before it was deleted rather than 2191 * then no longer matches the actual (corrupted) string of the target.
1655 * under the original name of the file that was moved on top of it. 2192 * The hash value has to match the hash queue that the dentry is on..
1656 */ 2193 */
1657
1658/* 2194/*
1659 * d_move_locked - move a dentry 2195 * d_move - move a dentry
1660 * @dentry: entry to move 2196 * @dentry: entry to move
1661 * @target: new dentry 2197 * @target: new dentry
1662 * 2198 *
1663 * Update the dcache to reflect the move of a file name. Negative 2199 * Update the dcache to reflect the move of a file name. Negative
1664 * dcache entries should not be moved in this way. 2200 * dcache entries should not be moved in this way.
1665 */ 2201 */
1666static void d_move_locked(struct dentry * dentry, struct dentry * target) 2202void d_move(struct dentry * dentry, struct dentry * target)
1667{ 2203{
1668 struct hlist_head *list;
1669
1670 if (!dentry->d_inode) 2204 if (!dentry->d_inode)
1671 printk(KERN_WARNING "VFS: moving negative dcache entry\n"); 2205 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
1672 2206
2207 BUG_ON(d_ancestor(dentry, target));
2208 BUG_ON(d_ancestor(target, dentry));
2209
1673 write_seqlock(&rename_lock); 2210 write_seqlock(&rename_lock);
1674 /*
1675 * XXXX: do we really need to take target->d_lock?
1676 */
1677 if (target < dentry) {
1678 spin_lock(&target->d_lock);
1679 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1680 } else {
1681 spin_lock(&dentry->d_lock);
1682 spin_lock_nested(&target->d_lock, DENTRY_D_LOCK_NESTED);
1683 }
1684 2211
1685 /* Move the dentry to the target hash queue, if on different bucket */ 2212 dentry_lock_for_move(dentry, target);
1686 if (d_unhashed(dentry))
1687 goto already_unhashed;
1688 2213
1689 hlist_del_rcu(&dentry->d_hash); 2214 write_seqcount_begin(&dentry->d_seq);
2215 write_seqcount_begin(&target->d_seq);
1690 2216
1691already_unhashed: 2217 /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
1692 list = d_hash(target->d_parent, target->d_name.hash); 2218
1693 __d_rehash(dentry, list); 2219 /*
2220 * Move the dentry to the target hash queue. Don't bother checking
2221 * for the same hash queue because of how unlikely it is.
2222 */
2223 __d_drop(dentry);
2224 __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
1694 2225
1695 /* Unhash the target: dput() will then get rid of it */ 2226 /* Unhash the target: dput() will then get rid of it */
1696 __d_drop(target); 2227 __d_drop(target);
@@ -1715,27 +2246,16 @@ already_unhashed:
1715 } 2246 }
1716 2247
1717 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 2248 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
2249
2250 write_seqcount_end(&target->d_seq);
2251 write_seqcount_end(&dentry->d_seq);
2252
2253 dentry_unlock_parents_for_move(dentry, target);
1718 spin_unlock(&target->d_lock); 2254 spin_unlock(&target->d_lock);
1719 fsnotify_d_move(dentry); 2255 fsnotify_d_move(dentry);
1720 spin_unlock(&dentry->d_lock); 2256 spin_unlock(&dentry->d_lock);
1721 write_sequnlock(&rename_lock); 2257 write_sequnlock(&rename_lock);
1722} 2258}
1723
1724/**
1725 * d_move - move a dentry
1726 * @dentry: entry to move
1727 * @target: new dentry
1728 *
1729 * Update the dcache to reflect the move of a file name. Negative
1730 * dcache entries should not be moved in this way.
1731 */
1732
1733void d_move(struct dentry * dentry, struct dentry * target)
1734{
1735 spin_lock(&dcache_lock);
1736 d_move_locked(dentry, target);
1737 spin_unlock(&dcache_lock);
1738}
1739EXPORT_SYMBOL(d_move); 2259EXPORT_SYMBOL(d_move);
1740 2260
1741/** 2261/**
@@ -1761,13 +2281,13 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1761 * This helper attempts to cope with remotely renamed directories 2281 * This helper attempts to cope with remotely renamed directories
1762 * 2282 *
1763 * It assumes that the caller is already holding 2283 * It assumes that the caller is already holding
1764 * dentry->d_parent->d_inode->i_mutex and the dcache_lock 2284 * dentry->d_parent->d_inode->i_mutex and the inode->i_lock
1765 * 2285 *
1766 * Note: If ever the locking in lock_rename() changes, then please 2286 * Note: If ever the locking in lock_rename() changes, then please
1767 * remember to update this too... 2287 * remember to update this too...
1768 */ 2288 */
1769static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) 2289static struct dentry *__d_unalias(struct inode *inode,
1770 __releases(dcache_lock) 2290 struct dentry *dentry, struct dentry *alias)
1771{ 2291{
1772 struct mutex *m1 = NULL, *m2 = NULL; 2292 struct mutex *m1 = NULL, *m2 = NULL;
1773 struct dentry *ret; 2293 struct dentry *ret;
@@ -1790,10 +2310,10 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1790 goto out_err; 2310 goto out_err;
1791 m2 = &alias->d_parent->d_inode->i_mutex; 2311 m2 = &alias->d_parent->d_inode->i_mutex;
1792out_unalias: 2312out_unalias:
1793 d_move_locked(alias, dentry); 2313 d_move(alias, dentry);
1794 ret = alias; 2314 ret = alias;
1795out_err: 2315out_err:
1796 spin_unlock(&dcache_lock); 2316 spin_unlock(&inode->i_lock);
1797 if (m2) 2317 if (m2)
1798 mutex_unlock(m2); 2318 mutex_unlock(m2);
1799 if (m1) 2319 if (m1)
@@ -1804,17 +2324,23 @@ out_err:
1804/* 2324/*
1805 * Prepare an anonymous dentry for life in the superblock's dentry tree as a 2325 * Prepare an anonymous dentry for life in the superblock's dentry tree as a
1806 * named dentry in place of the dentry to be replaced. 2326 * named dentry in place of the dentry to be replaced.
2327 * returns with anon->d_lock held!
1807 */ 2328 */
1808static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) 2329static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1809{ 2330{
1810 struct dentry *dparent, *aparent; 2331 struct dentry *dparent, *aparent;
1811 2332
1812 switch_names(dentry, anon); 2333 dentry_lock_for_move(anon, dentry);
1813 swap(dentry->d_name.hash, anon->d_name.hash); 2334
2335 write_seqcount_begin(&dentry->d_seq);
2336 write_seqcount_begin(&anon->d_seq);
1814 2337
1815 dparent = dentry->d_parent; 2338 dparent = dentry->d_parent;
1816 aparent = anon->d_parent; 2339 aparent = anon->d_parent;
1817 2340
2341 switch_names(dentry, anon);
2342 swap(dentry->d_name.hash, anon->d_name.hash);
2343
1818 dentry->d_parent = (aparent == anon) ? dentry : aparent; 2344 dentry->d_parent = (aparent == anon) ? dentry : aparent;
1819 list_del(&dentry->d_u.d_child); 2345 list_del(&dentry->d_u.d_child);
1820 if (!IS_ROOT(dentry)) 2346 if (!IS_ROOT(dentry))
@@ -1829,6 +2355,13 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
1829 else 2355 else
1830 INIT_LIST_HEAD(&anon->d_u.d_child); 2356 INIT_LIST_HEAD(&anon->d_u.d_child);
1831 2357
2358 write_seqcount_end(&dentry->d_seq);
2359 write_seqcount_end(&anon->d_seq);
2360
2361 dentry_unlock_parents_for_move(anon, dentry);
2362 spin_unlock(&dentry->d_lock);
2363
2364 /* anon->d_lock still locked, returns locked */
1832 anon->d_flags &= ~DCACHE_DISCONNECTED; 2365 anon->d_flags &= ~DCACHE_DISCONNECTED;
1833} 2366}
1834 2367
@@ -1846,14 +2379,15 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1846 2379
1847 BUG_ON(!d_unhashed(dentry)); 2380 BUG_ON(!d_unhashed(dentry));
1848 2381
1849 spin_lock(&dcache_lock);
1850
1851 if (!inode) { 2382 if (!inode) {
1852 actual = dentry; 2383 actual = dentry;
1853 __d_instantiate(dentry, NULL); 2384 __d_instantiate(dentry, NULL);
1854 goto found_lock; 2385 d_rehash(actual);
2386 goto out_nolock;
1855 } 2387 }
1856 2388
2389 spin_lock(&inode->i_lock);
2390
1857 if (S_ISDIR(inode->i_mode)) { 2391 if (S_ISDIR(inode->i_mode)) {
1858 struct dentry *alias; 2392 struct dentry *alias;
1859 2393
@@ -1864,13 +2398,12 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1864 /* Is this an anonymous mountpoint that we could splice 2398 /* Is this an anonymous mountpoint that we could splice
1865 * into our tree? */ 2399 * into our tree? */
1866 if (IS_ROOT(alias)) { 2400 if (IS_ROOT(alias)) {
1867 spin_lock(&alias->d_lock);
1868 __d_materialise_dentry(dentry, alias); 2401 __d_materialise_dentry(dentry, alias);
1869 __d_drop(alias); 2402 __d_drop(alias);
1870 goto found; 2403 goto found;
1871 } 2404 }
1872 /* Nope, but we must(!) avoid directory aliasing */ 2405 /* Nope, but we must(!) avoid directory aliasing */
1873 actual = __d_unalias(dentry, alias); 2406 actual = __d_unalias(inode, dentry, alias);
1874 if (IS_ERR(actual)) 2407 if (IS_ERR(actual))
1875 dput(alias); 2408 dput(alias);
1876 goto out_nolock; 2409 goto out_nolock;
@@ -1881,15 +2414,14 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1881 actual = __d_instantiate_unique(dentry, inode); 2414 actual = __d_instantiate_unique(dentry, inode);
1882 if (!actual) 2415 if (!actual)
1883 actual = dentry; 2416 actual = dentry;
1884 else if (unlikely(!d_unhashed(actual))) 2417 else
1885 goto shouldnt_be_hashed; 2418 BUG_ON(!d_unhashed(actual));
1886 2419
1887found_lock:
1888 spin_lock(&actual->d_lock); 2420 spin_lock(&actual->d_lock);
1889found: 2421found:
1890 _d_rehash(actual); 2422 _d_rehash(actual);
1891 spin_unlock(&actual->d_lock); 2423 spin_unlock(&actual->d_lock);
1892 spin_unlock(&dcache_lock); 2424 spin_unlock(&inode->i_lock);
1893out_nolock: 2425out_nolock:
1894 if (actual == dentry) { 2426 if (actual == dentry) {
1895 security_d_instantiate(dentry, inode); 2427 security_d_instantiate(dentry, inode);
@@ -1898,10 +2430,6 @@ out_nolock:
1898 2430
1899 iput(inode); 2431 iput(inode);
1900 return actual; 2432 return actual;
1901
1902shouldnt_be_hashed:
1903 spin_unlock(&dcache_lock);
1904 BUG();
1905} 2433}
1906EXPORT_SYMBOL_GPL(d_materialise_unique); 2434EXPORT_SYMBOL_GPL(d_materialise_unique);
1907 2435
@@ -1928,7 +2456,7 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
1928 * @buffer: pointer to the end of the buffer 2456 * @buffer: pointer to the end of the buffer
1929 * @buflen: pointer to buffer length 2457 * @buflen: pointer to buffer length
1930 * 2458 *
1931 * Caller holds the dcache_lock. 2459 * Caller holds the rename_lock.
1932 * 2460 *
1933 * If path is not reachable from the supplied root, then the value of 2461 * If path is not reachable from the supplied root, then the value of
1934 * root is changed (without modifying refcounts). 2462 * root is changed (without modifying refcounts).
@@ -1956,7 +2484,9 @@ static int prepend_path(const struct path *path, struct path *root,
1956 } 2484 }
1957 parent = dentry->d_parent; 2485 parent = dentry->d_parent;
1958 prefetch(parent); 2486 prefetch(parent);
2487 spin_lock(&dentry->d_lock);
1959 error = prepend_name(buffer, buflen, &dentry->d_name); 2488 error = prepend_name(buffer, buflen, &dentry->d_name);
2489 spin_unlock(&dentry->d_lock);
1960 if (!error) 2490 if (!error)
1961 error = prepend(buffer, buflen, "/", 1); 2491 error = prepend(buffer, buflen, "/", 1);
1962 if (error) 2492 if (error)
@@ -2012,9 +2542,9 @@ char *__d_path(const struct path *path, struct path *root,
2012 int error; 2542 int error;
2013 2543
2014 prepend(&res, &buflen, "\0", 1); 2544 prepend(&res, &buflen, "\0", 1);
2015 spin_lock(&dcache_lock); 2545 write_seqlock(&rename_lock);
2016 error = prepend_path(path, root, &res, &buflen); 2546 error = prepend_path(path, root, &res, &buflen);
2017 spin_unlock(&dcache_lock); 2547 write_sequnlock(&rename_lock);
2018 2548
2019 if (error) 2549 if (error)
2020 return ERR_PTR(error); 2550 return ERR_PTR(error);
@@ -2076,12 +2606,12 @@ char *d_path(const struct path *path, char *buf, int buflen)
2076 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2606 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2077 2607
2078 get_fs_root(current->fs, &root); 2608 get_fs_root(current->fs, &root);
2079 spin_lock(&dcache_lock); 2609 write_seqlock(&rename_lock);
2080 tmp = root; 2610 tmp = root;
2081 error = path_with_deleted(path, &tmp, &res, &buflen); 2611 error = path_with_deleted(path, &tmp, &res, &buflen);
2082 if (error) 2612 if (error)
2083 res = ERR_PTR(error); 2613 res = ERR_PTR(error);
2084 spin_unlock(&dcache_lock); 2614 write_sequnlock(&rename_lock);
2085 path_put(&root); 2615 path_put(&root);
2086 return res; 2616 return res;
2087} 2617}
@@ -2107,12 +2637,12 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
2107 return path->dentry->d_op->d_dname(path->dentry, buf, buflen); 2637 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
2108 2638
2109 get_fs_root(current->fs, &root); 2639 get_fs_root(current->fs, &root);
2110 spin_lock(&dcache_lock); 2640 write_seqlock(&rename_lock);
2111 tmp = root; 2641 tmp = root;
2112 error = path_with_deleted(path, &tmp, &res, &buflen); 2642 error = path_with_deleted(path, &tmp, &res, &buflen);
2113 if (!error && !path_equal(&tmp, &root)) 2643 if (!error && !path_equal(&tmp, &root))
2114 error = prepend_unreachable(&res, &buflen); 2644 error = prepend_unreachable(&res, &buflen);
2115 spin_unlock(&dcache_lock); 2645 write_sequnlock(&rename_lock);
2116 path_put(&root); 2646 path_put(&root);
2117 if (error) 2647 if (error)
2118 res = ERR_PTR(error); 2648 res = ERR_PTR(error);
@@ -2144,7 +2674,7 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
2144/* 2674/*
2145 * Write full pathname from the root of the filesystem into the buffer. 2675 * Write full pathname from the root of the filesystem into the buffer.
2146 */ 2676 */
2147char *__dentry_path(struct dentry *dentry, char *buf, int buflen) 2677static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2148{ 2678{
2149 char *end = buf + buflen; 2679 char *end = buf + buflen;
2150 char *retval; 2680 char *retval;
@@ -2158,10 +2688,13 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2158 2688
2159 while (!IS_ROOT(dentry)) { 2689 while (!IS_ROOT(dentry)) {
2160 struct dentry *parent = dentry->d_parent; 2690 struct dentry *parent = dentry->d_parent;
2691 int error;
2161 2692
2162 prefetch(parent); 2693 prefetch(parent);
2163 if ((prepend_name(&end, &buflen, &dentry->d_name) != 0) || 2694 spin_lock(&dentry->d_lock);
2164 (prepend(&end, &buflen, "/", 1) != 0)) 2695 error = prepend_name(&end, &buflen, &dentry->d_name);
2696 spin_unlock(&dentry->d_lock);
2697 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
2165 goto Elong; 2698 goto Elong;
2166 2699
2167 retval = end; 2700 retval = end;
@@ -2171,14 +2704,25 @@ char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
2171Elong: 2704Elong:
2172 return ERR_PTR(-ENAMETOOLONG); 2705 return ERR_PTR(-ENAMETOOLONG);
2173} 2706}
2174EXPORT_SYMBOL(__dentry_path); 2707
2708char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
2709{
2710 char *retval;
2711
2712 write_seqlock(&rename_lock);
2713 retval = __dentry_path(dentry, buf, buflen);
2714 write_sequnlock(&rename_lock);
2715
2716 return retval;
2717}
2718EXPORT_SYMBOL(dentry_path_raw);
2175 2719
2176char *dentry_path(struct dentry *dentry, char *buf, int buflen) 2720char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2177{ 2721{
2178 char *p = NULL; 2722 char *p = NULL;
2179 char *retval; 2723 char *retval;
2180 2724
2181 spin_lock(&dcache_lock); 2725 write_seqlock(&rename_lock);
2182 if (d_unlinked(dentry)) { 2726 if (d_unlinked(dentry)) {
2183 p = buf + buflen; 2727 p = buf + buflen;
2184 if (prepend(&p, &buflen, "//deleted", 10) != 0) 2728 if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2186,12 +2730,11 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
2186 buflen++; 2730 buflen++;
2187 } 2731 }
2188 retval = __dentry_path(dentry, buf, buflen); 2732 retval = __dentry_path(dentry, buf, buflen);
2189 spin_unlock(&dcache_lock); 2733 write_sequnlock(&rename_lock);
2190 if (!IS_ERR(retval) && p) 2734 if (!IS_ERR(retval) && p)
2191 *p = '/'; /* restore '/' overriden with '\0' */ 2735 *p = '/'; /* restore '/' overriden with '\0' */
2192 return retval; 2736 return retval;
2193Elong: 2737Elong:
2194 spin_unlock(&dcache_lock);
2195 return ERR_PTR(-ENAMETOOLONG); 2738 return ERR_PTR(-ENAMETOOLONG);
2196} 2739}
2197 2740
@@ -2225,7 +2768,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2225 get_fs_root_and_pwd(current->fs, &root, &pwd); 2768 get_fs_root_and_pwd(current->fs, &root, &pwd);
2226 2769
2227 error = -ENOENT; 2770 error = -ENOENT;
2228 spin_lock(&dcache_lock); 2771 write_seqlock(&rename_lock);
2229 if (!d_unlinked(pwd.dentry)) { 2772 if (!d_unlinked(pwd.dentry)) {
2230 unsigned long len; 2773 unsigned long len;
2231 struct path tmp = root; 2774 struct path tmp = root;
@@ -2234,7 +2777,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2234 2777
2235 prepend(&cwd, &buflen, "\0", 1); 2778 prepend(&cwd, &buflen, "\0", 1);
2236 error = prepend_path(&pwd, &tmp, &cwd, &buflen); 2779 error = prepend_path(&pwd, &tmp, &cwd, &buflen);
2237 spin_unlock(&dcache_lock); 2780 write_sequnlock(&rename_lock);
2238 2781
2239 if (error) 2782 if (error)
2240 goto out; 2783 goto out;
@@ -2253,8 +2796,9 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
2253 if (copy_to_user(buf, cwd, len)) 2796 if (copy_to_user(buf, cwd, len))
2254 error = -EFAULT; 2797 error = -EFAULT;
2255 } 2798 }
2256 } else 2799 } else {
2257 spin_unlock(&dcache_lock); 2800 write_sequnlock(&rename_lock);
2801 }
2258 2802
2259out: 2803out:
2260 path_put(&pwd); 2804 path_put(&pwd);
@@ -2282,25 +2826,25 @@ out:
2282int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) 2826int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2283{ 2827{
2284 int result; 2828 int result;
2285 unsigned long seq; 2829 unsigned seq;
2286 2830
2287 if (new_dentry == old_dentry) 2831 if (new_dentry == old_dentry)
2288 return 1; 2832 return 1;
2289 2833
2290 /*
2291 * Need rcu_readlock to protect against the d_parent trashing
2292 * due to d_move
2293 */
2294 rcu_read_lock();
2295 do { 2834 do {
2296 /* for restarting inner loop in case of seq retry */ 2835 /* for restarting inner loop in case of seq retry */
2297 seq = read_seqbegin(&rename_lock); 2836 seq = read_seqbegin(&rename_lock);
2837 /*
2838 * Need rcu_readlock to protect against the d_parent trashing
2839 * due to d_move
2840 */
2841 rcu_read_lock();
2298 if (d_ancestor(old_dentry, new_dentry)) 2842 if (d_ancestor(old_dentry, new_dentry))
2299 result = 1; 2843 result = 1;
2300 else 2844 else
2301 result = 0; 2845 result = 0;
2846 rcu_read_unlock();
2302 } while (read_seqretry(&rename_lock, seq)); 2847 } while (read_seqretry(&rename_lock, seq));
2303 rcu_read_unlock();
2304 2848
2305 return result; 2849 return result;
2306} 2850}
@@ -2332,10 +2876,15 @@ EXPORT_SYMBOL(path_is_under);
2332 2876
2333void d_genocide(struct dentry *root) 2877void d_genocide(struct dentry *root)
2334{ 2878{
2335 struct dentry *this_parent = root; 2879 struct dentry *this_parent;
2336 struct list_head *next; 2880 struct list_head *next;
2881 unsigned seq;
2882 int locked = 0;
2337 2883
2338 spin_lock(&dcache_lock); 2884 seq = read_seqbegin(&rename_lock);
2885again:
2886 this_parent = root;
2887 spin_lock(&this_parent->d_lock);
2339repeat: 2888repeat:
2340 next = this_parent->d_subdirs.next; 2889 next = this_parent->d_subdirs.next;
2341resume: 2890resume:
@@ -2343,21 +2892,62 @@ resume:
2343 struct list_head *tmp = next; 2892 struct list_head *tmp = next;
2344 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 2893 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
2345 next = tmp->next; 2894 next = tmp->next;
2346 if (d_unhashed(dentry)||!dentry->d_inode) 2895
2896 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
2897 if (d_unhashed(dentry) || !dentry->d_inode) {
2898 spin_unlock(&dentry->d_lock);
2347 continue; 2899 continue;
2900 }
2348 if (!list_empty(&dentry->d_subdirs)) { 2901 if (!list_empty(&dentry->d_subdirs)) {
2902 spin_unlock(&this_parent->d_lock);
2903 spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
2349 this_parent = dentry; 2904 this_parent = dentry;
2905 spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
2350 goto repeat; 2906 goto repeat;
2351 } 2907 }
2352 atomic_dec(&dentry->d_count); 2908 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
2909 dentry->d_flags |= DCACHE_GENOCIDE;
2910 dentry->d_count--;
2911 }
2912 spin_unlock(&dentry->d_lock);
2353 } 2913 }
2354 if (this_parent != root) { 2914 if (this_parent != root) {
2355 next = this_parent->d_u.d_child.next; 2915 struct dentry *tmp;
2356 atomic_dec(&this_parent->d_count); 2916 struct dentry *child;
2357 this_parent = this_parent->d_parent; 2917
2918 tmp = this_parent->d_parent;
2919 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2920 this_parent->d_flags |= DCACHE_GENOCIDE;
2921 this_parent->d_count--;
2922 }
2923 rcu_read_lock();
2924 spin_unlock(&this_parent->d_lock);
2925 child = this_parent;
2926 this_parent = tmp;
2927 spin_lock(&this_parent->d_lock);
2928 /* might go back up the wrong parent if we have had a rename
2929 * or deletion */
2930 if (this_parent != child->d_parent ||
2931 (!locked && read_seqretry(&rename_lock, seq))) {
2932 spin_unlock(&this_parent->d_lock);
2933 rcu_read_unlock();
2934 goto rename_retry;
2935 }
2936 rcu_read_unlock();
2937 next = child->d_u.d_child.next;
2358 goto resume; 2938 goto resume;
2359 } 2939 }
2360 spin_unlock(&dcache_lock); 2940 spin_unlock(&this_parent->d_lock);
2941 if (!locked && read_seqretry(&rename_lock, seq))
2942 goto rename_retry;
2943 if (locked)
2944 write_sequnlock(&rename_lock);
2945 return;
2946
2947rename_retry:
2948 locked = 1;
2949 write_seqlock(&rename_lock);
2950 goto again;
2361} 2951}
2362 2952
2363/** 2953/**
@@ -2411,7 +3001,7 @@ static void __init dcache_init_early(void)
2411 3001
2412 dentry_hashtable = 3002 dentry_hashtable =
2413 alloc_large_system_hash("Dentry cache", 3003 alloc_large_system_hash("Dentry cache",
2414 sizeof(struct hlist_head), 3004 sizeof(struct dcache_hash_bucket),
2415 dhash_entries, 3005 dhash_entries,
2416 13, 3006 13,
2417 HASH_EARLY, 3007 HASH_EARLY,
@@ -2420,16 +3010,13 @@ static void __init dcache_init_early(void)
2420 0); 3010 0);
2421 3011
2422 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3012 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2423 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3013 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
2424} 3014}
2425 3015
2426static void __init dcache_init(void) 3016static void __init dcache_init(void)
2427{ 3017{
2428 int loop; 3018 int loop;
2429 3019
2430 percpu_counter_init(&nr_dentry, 0);
2431 percpu_counter_init(&nr_dentry_unused, 0);
2432
2433 /* 3020 /*
2434 * A constructor could be added for stable state like the lists, 3021 * A constructor could be added for stable state like the lists,
2435 * but it is probably not worth it because of the cache nature 3022 * but it is probably not worth it because of the cache nature
@@ -2446,7 +3033,7 @@ static void __init dcache_init(void)
2446 3033
2447 dentry_hashtable = 3034 dentry_hashtable =
2448 alloc_large_system_hash("Dentry cache", 3035 alloc_large_system_hash("Dentry cache",
2449 sizeof(struct hlist_head), 3036 sizeof(struct dcache_hash_bucket),
2450 dhash_entries, 3037 dhash_entries,
2451 13, 3038 13,
2452 0, 3039 0,
@@ -2455,7 +3042,7 @@ static void __init dcache_init(void)
2455 0); 3042 0);
2456 3043
2457 for (loop = 0; loop < (1 << d_hash_shift); loop++) 3044 for (loop = 0; loop < (1 << d_hash_shift); loop++)
2458 INIT_HLIST_HEAD(&dentry_hashtable[loop]); 3045 INIT_HLIST_BL_HEAD(&dentry_hashtable[loop].head);
2459} 3046}
2460 3047
2461/* SLAB cache for __getname() consumers */ 3048/* SLAB cache for __getname() consumers */
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 906e803f7f79..6fc4f319b550 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -44,12 +44,17 @@
44 */ 44 */
45static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) 45static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
46{ 46{
47 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 48 struct vfsmount *lower_mnt;
49 struct dentry *dentry_save; 49 struct dentry *dentry_save;
50 struct vfsmount *vfsmount_save; 50 struct vfsmount *vfsmount_save;
51 int rc = 1; 51 int rc = 1;
52 52
53 if (nd->flags & LOOKUP_RCU)
54 return -ECHILD;
55
56 lower_dentry = ecryptfs_dentry_to_lower(dentry);
57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
53 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
54 goto out; 59 goto out;
55 dentry_save = nd->path.dentry; 60 dentry_save = nd->path.dentry;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 9d1a22d62765..337352a94751 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -260,7 +260,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
260 ecryptfs_dentry->d_parent)); 260 ecryptfs_dentry->d_parent));
261 lower_inode = lower_dentry->d_inode; 261 lower_inode = lower_dentry->d_inode;
262 fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); 262 fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode);
263 BUG_ON(!atomic_read(&lower_dentry->d_count)); 263 BUG_ON(!lower_dentry->d_count);
264 ecryptfs_set_dentry_private(ecryptfs_dentry, 264 ecryptfs_set_dentry_private(ecryptfs_dentry,
265 kmem_cache_alloc(ecryptfs_dentry_info_cache, 265 kmem_cache_alloc(ecryptfs_dentry_info_cache,
266 GFP_KERNEL)); 266 GFP_KERNEL));
@@ -441,7 +441,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
441 struct qstr lower_name; 441 struct qstr lower_name;
442 int rc = 0; 442 int rc = 0;
443 443
444 ecryptfs_dentry->d_op = &ecryptfs_dops; 444 d_set_d_op(ecryptfs_dentry, &ecryptfs_dops);
445 if ((ecryptfs_dentry->d_name.len == 1 445 if ((ecryptfs_dentry->d_name.len == 1
446 && !strcmp(ecryptfs_dentry->d_name.name, ".")) 446 && !strcmp(ecryptfs_dentry->d_name.name, "."))
447 || (ecryptfs_dentry->d_name.len == 2 447 || (ecryptfs_dentry->d_name.len == 2
@@ -454,7 +454,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
454 lower_name.hash = ecryptfs_dentry->d_name.hash; 454 lower_name.hash = ecryptfs_dentry->d_name.hash;
455 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 455 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
456 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 456 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
457 &lower_name); 457 lower_dir_dentry->d_inode, &lower_name);
458 if (rc < 0) 458 if (rc < 0)
459 goto out_d_drop; 459 goto out_d_drop;
460 } 460 }
@@ -489,7 +489,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
489 lower_name.hash = full_name_hash(lower_name.name, lower_name.len); 489 lower_name.hash = full_name_hash(lower_name.name, lower_name.len);
490 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) { 490 if (lower_dir_dentry->d_op && lower_dir_dentry->d_op->d_hash) {
491 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry, 491 rc = lower_dir_dentry->d_op->d_hash(lower_dir_dentry,
492 &lower_name); 492 lower_dir_dentry->d_inode, &lower_name);
493 if (rc < 0) 493 if (rc < 0)
494 goto out_d_drop; 494 goto out_d_drop;
495 } 495 }
@@ -980,8 +980,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
980} 980}
981 981
982static int 982static int
983ecryptfs_permission(struct inode *inode, int mask) 983ecryptfs_permission(struct inode *inode, int mask, unsigned int flags)
984{ 984{
985 if (flags & IPERM_FLAG_RCU)
986 return -ECHILD;
985 return inode_permission(ecryptfs_inode_to_lower(inode), mask); 987 return inode_permission(ecryptfs_inode_to_lower(inode), mask);
986} 988}
987 989
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index a9dbd62518e6..351038675376 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
189 if (special_file(lower_inode->i_mode)) 189 if (special_file(lower_inode->i_mode))
190 init_special_inode(inode, lower_inode->i_mode, 190 init_special_inode(inode, lower_inode->i_mode,
191 lower_inode->i_rdev); 191 lower_inode->i_rdev);
192 dentry->d_op = &ecryptfs_dops; 192 d_set_d_op(dentry, &ecryptfs_dops);
193 fsstack_copy_attr_all(inode, lower_inode); 193 fsstack_copy_attr_all(inode, lower_inode);
194 /* This size will be overwritten for real files w/ headers and 194 /* This size will be overwritten for real files w/ headers and
195 * other metadata */ 195 * other metadata */
@@ -594,7 +594,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
594 deactivate_locked_super(s); 594 deactivate_locked_super(s);
595 goto out; 595 goto out;
596 } 596 }
597 s->s_root->d_op = &ecryptfs_dops; 597 d_set_d_op(s->s_root, &ecryptfs_dops);
598 s->s_root->d_sb = s; 598 s->s_root->d_sb = s;
599 s->s_root->d_parent = s->s_root; 599 s->s_root->d_parent = s->s_root;
600 600
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 253732382d37..3042fe123a34 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -28,7 +28,6 @@
28#include <linux/key.h> 28#include <linux/key.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/seq_file.h> 30#include <linux/seq_file.h>
31#include <linux/smp_lock.h>
32#include <linux/file.h> 31#include <linux/file.h>
33#include <linux/crypto.h> 32#include <linux/crypto.h>
34#include "ecryptfs_kernel.h" 33#include "ecryptfs_kernel.h"
@@ -63,6 +62,16 @@ out:
63 return inode; 62 return inode;
64} 63}
65 64
65static void ecryptfs_i_callback(struct rcu_head *head)
66{
67 struct inode *inode = container_of(head, struct inode, i_rcu);
68 struct ecryptfs_inode_info *inode_info;
69 inode_info = ecryptfs_inode_to_private(inode);
70
71 INIT_LIST_HEAD(&inode->i_dentry);
72 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
73}
74
66/** 75/**
67 * ecryptfs_destroy_inode 76 * ecryptfs_destroy_inode
68 * @inode: The ecryptfs inode 77 * @inode: The ecryptfs inode
@@ -89,7 +98,7 @@ static void ecryptfs_destroy_inode(struct inode *inode)
89 } 98 }
90 } 99 }
91 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat); 100 ecryptfs_destroy_crypt_stat(&inode_info->crypt_stat);
92 kmem_cache_free(ecryptfs_inode_info_cache, inode_info); 101 call_rcu(&inode->i_rcu, ecryptfs_i_callback);
93} 102}
94 103
95/** 104/**
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 5073a07652cc..0f31acb0131c 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -65,11 +65,18 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
65 return &ei->vfs_inode; 65 return &ei->vfs_inode;
66} 66}
67 67
68static void efs_destroy_inode(struct inode *inode) 68static void efs_i_callback(struct rcu_head *head)
69{ 69{
70 struct inode *inode = container_of(head, struct inode, i_rcu);
71 INIT_LIST_HEAD(&inode->i_dentry);
70 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); 72 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
71} 73}
72 74
75static void efs_destroy_inode(struct inode *inode)
76{
77 call_rcu(&inode->i_rcu, efs_i_callback);
78}
79
73static void init_once(void *foo) 80static void init_once(void *foo)
74{ 81{
75 struct efs_inode_info *ei = (struct efs_inode_info *) foo; 82 struct efs_inode_info *ei = (struct efs_inode_info *) foo;
diff --git a/fs/exec.c b/fs/exec.c
index 99d33a1371e9..c62efcb959c7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -164,7 +164,26 @@ out:
164 164
165#ifdef CONFIG_MMU 165#ifdef CONFIG_MMU
166 166
167static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 167void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
168{
169 struct mm_struct *mm = current->mm;
170 long diff = (long)(pages - bprm->vma_pages);
171
172 if (!mm || !diff)
173 return;
174
175 bprm->vma_pages = pages;
176
177#ifdef SPLIT_RSS_COUNTING
178 add_mm_counter(mm, MM_ANONPAGES, diff);
179#else
180 spin_lock(&mm->page_table_lock);
181 add_mm_counter(mm, MM_ANONPAGES, diff);
182 spin_unlock(&mm->page_table_lock);
183#endif
184}
185
186struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
168 int write) 187 int write)
169{ 188{
170 struct page *page; 189 struct page *page;
@@ -186,6 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
186 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; 205 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
187 struct rlimit *rlim; 206 struct rlimit *rlim;
188 207
208 acct_arg_size(bprm, size / PAGE_SIZE);
209
189 /* 210 /*
190 * We've historically supported up to 32 pages (ARG_MAX) 211 * We've historically supported up to 32 pages (ARG_MAX)
191 * of argument strings even with small stacks 212 * of argument strings even with small stacks
@@ -254,6 +275,11 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
254 vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; 275 vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
255 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 276 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
256 INIT_LIST_HEAD(&vma->anon_vma_chain); 277 INIT_LIST_HEAD(&vma->anon_vma_chain);
278
279 err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
280 if (err)
281 goto err;
282
257 err = insert_vm_struct(mm, vma); 283 err = insert_vm_struct(mm, vma);
258 if (err) 284 if (err)
259 goto err; 285 goto err;
@@ -276,7 +302,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
276 302
277#else 303#else
278 304
279static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, 305void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
306{
307}
308
309struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
280 int write) 310 int write)
281{ 311{
282 struct page *page; 312 struct page *page;
@@ -1003,6 +1033,7 @@ int flush_old_exec(struct linux_binprm * bprm)
1003 /* 1033 /*
1004 * Release all of the old mmap stuff 1034 * Release all of the old mmap stuff
1005 */ 1035 */
1036 acct_arg_size(bprm, 0);
1006 retval = exec_mmap(bprm->mm); 1037 retval = exec_mmap(bprm->mm);
1007 if (retval) 1038 if (retval)
1008 goto out; 1039 goto out;
@@ -1426,8 +1457,10 @@ int do_execve(const char * filename,
1426 return retval; 1457 return retval;
1427 1458
1428out: 1459out:
1429 if (bprm->mm) 1460 if (bprm->mm) {
1430 mmput (bprm->mm); 1461 acct_arg_size(bprm, 0);
1462 mmput(bprm->mm);
1463 }
1431 1464
1432out_file: 1465out_file:
1433 if (bprm->file) { 1466 if (bprm->file) {
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 79c3ae6e0456..8c6c4669b381 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -150,12 +150,19 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
150 return &oi->vfs_inode; 150 return &oi->vfs_inode;
151} 151}
152 152
153static void exofs_i_callback(struct rcu_head *head)
154{
155 struct inode *inode = container_of(head, struct inode, i_rcu);
156 INIT_LIST_HEAD(&inode->i_dentry);
157 kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
158}
159
153/* 160/*
154 * Remove an inode from the cache 161 * Remove an inode from the cache
155 */ 162 */
156static void exofs_destroy_inode(struct inode *inode) 163static void exofs_destroy_inode(struct inode *inode)
157{ 164{
158 kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); 165 call_rcu(&inode->i_rcu, exofs_i_callback);
159} 166}
160 167
161/* 168/*
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 51b304056f10..4b6825740dd5 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -43,24 +43,26 @@ find_acceptable_alias(struct dentry *result,
43 void *context) 43 void *context)
44{ 44{
45 struct dentry *dentry, *toput = NULL; 45 struct dentry *dentry, *toput = NULL;
46 struct inode *inode;
46 47
47 if (acceptable(context, result)) 48 if (acceptable(context, result))
48 return result; 49 return result;
49 50
50 spin_lock(&dcache_lock); 51 inode = result->d_inode;
51 list_for_each_entry(dentry, &result->d_inode->i_dentry, d_alias) { 52 spin_lock(&inode->i_lock);
52 dget_locked(dentry); 53 list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
53 spin_unlock(&dcache_lock); 54 dget(dentry);
55 spin_unlock(&inode->i_lock);
54 if (toput) 56 if (toput)
55 dput(toput); 57 dput(toput);
56 if (dentry != result && acceptable(context, dentry)) { 58 if (dentry != result && acceptable(context, dentry)) {
57 dput(result); 59 dput(result);
58 return dentry; 60 return dentry;
59 } 61 }
60 spin_lock(&dcache_lock); 62 spin_lock(&inode->i_lock);
61 toput = dentry; 63 toput = dentry;
62 } 64 }
63 spin_unlock(&dcache_lock); 65 spin_unlock(&inode->i_lock);
64 66
65 if (toput) 67 if (toput)
66 dput(toput); 68 dput(toput);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 2bcc0431bada..7b4180554a62 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,10 +232,17 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
232} 232}
233 233
234int 234int
235ext2_check_acl(struct inode *inode, int mask) 235ext2_check_acl(struct inode *inode, int mask, unsigned int flags)
236{ 236{
237 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); 237 struct posix_acl *acl;
238
239 if (flags & IPERM_FLAG_RCU) {
240 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
241 return -ECHILD;
242 return -EAGAIN;
243 }
238 244
245 acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
239 if (IS_ERR(acl)) 246 if (IS_ERR(acl))
240 return PTR_ERR(acl); 247 return PTR_ERR(acl);
241 if (acl) { 248 if (acl) {
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 3ff6cbb9ac44..c939b7b12099 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size)
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext2_check_acl (struct inode *, int); 57extern int ext2_check_acl (struct inode *, int, unsigned int);
58extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
59extern int ext2_init_acl (struct inode *, struct inode *); 59extern int ext2_init_acl (struct inode *, struct inode *);
60 60
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index d89e0b6a2d78..e0c6380ff992 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -161,11 +161,18 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
161 return &ei->vfs_inode; 161 return &ei->vfs_inode;
162} 162}
163 163
164static void ext2_destroy_inode(struct inode *inode) 164static void ext2_i_callback(struct rcu_head *head)
165{ 165{
166 struct inode *inode = container_of(head, struct inode, i_rcu);
167 INIT_LIST_HEAD(&inode->i_dentry);
166 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); 168 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
167} 169}
168 170
171static void ext2_destroy_inode(struct inode *inode)
172{
173 call_rcu(&inode->i_rcu, ext2_i_callback);
174}
175
169static void init_once(void *foo) 176static void init_once(void *foo)
170{ 177{
171 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; 178 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 8a11fe212183..e4fa49e6c539 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,10 +240,17 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
240} 240}
241 241
242int 242int
243ext3_check_acl(struct inode *inode, int mask) 243ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
244{ 244{
245 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); 245 struct posix_acl *acl;
246
247 if (flags & IPERM_FLAG_RCU) {
248 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
249 return -ECHILD;
250 return -EAGAIN;
251 }
246 252
253 acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
247 if (IS_ERR(acl)) 254 if (IS_ERR(acl))
248 return PTR_ERR(acl); 255 return PTR_ERR(acl);
249 if (acl) { 256 if (acl) {
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 597334626de9..5faf8048e906 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_check_acl (struct inode *, int); 57extern int ext3_check_acl (struct inode *, int, unsigned int);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2fedaf8b5012..77ce1616f725 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -27,7 +27,6 @@
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/blkdev.h> 28#include <linux/blkdev.h>
29#include <linux/parser.h> 29#include <linux/parser.h>
30#include <linux/smp_lock.h>
31#include <linux/buffer_head.h> 30#include <linux/buffer_head.h>
32#include <linux/exportfs.h> 31#include <linux/exportfs.h>
33#include <linux/vfs.h> 32#include <linux/vfs.h>
@@ -480,6 +479,13 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
480 return &ei->vfs_inode; 479 return &ei->vfs_inode;
481} 480}
482 481
482static void ext3_i_callback(struct rcu_head *head)
483{
484 struct inode *inode = container_of(head, struct inode, i_rcu);
485 INIT_LIST_HEAD(&inode->i_dentry);
486 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
487}
488
483static void ext3_destroy_inode(struct inode *inode) 489static void ext3_destroy_inode(struct inode *inode)
484{ 490{
485 if (!list_empty(&(EXT3_I(inode)->i_orphan))) { 491 if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
@@ -490,7 +496,7 @@ static void ext3_destroy_inode(struct inode *inode)
490 false); 496 false);
491 dump_stack(); 497 dump_stack();
492 } 498 }
493 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 499 call_rcu(&inode->i_rcu, ext3_i_callback);
494} 500}
495 501
496static void init_once(void *foo) 502static void init_once(void *foo)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 5e2ed4504ead..e0270d1f8d82 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,10 +238,17 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
238} 238}
239 239
240int 240int
241ext4_check_acl(struct inode *inode, int mask) 241ext4_check_acl(struct inode *inode, int mask, unsigned int flags)
242{ 242{
243 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); 243 struct posix_acl *acl;
244
245 if (flags & IPERM_FLAG_RCU) {
246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
247 return -ECHILD;
248 return -EAGAIN;
249 }
244 250
251 acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
245 if (IS_ERR(acl)) 252 if (IS_ERR(acl))
246 return PTR_ERR(acl); 253 return PTR_ERR(acl);
247 if (acl) { 254 if (acl) {
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 9d843d5deac4..dec821168fd4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_check_acl(struct inode *, int); 57extern int ext4_check_acl(struct inode *, int, unsigned int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8b5dd6369f82..94ce3d7a1c4b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -177,7 +177,7 @@ struct mpage_da_data {
177 177
178struct ext4_io_page { 178struct ext4_io_page {
179 struct page *p_page; 179 struct page *p_page;
180 int p_count; 180 atomic_t p_count;
181}; 181};
182 182
183#define MAX_IO_PAGES 128 183#define MAX_IO_PAGES 128
@@ -858,6 +858,7 @@ struct ext4_inode_info {
858 spinlock_t i_completed_io_lock; 858 spinlock_t i_completed_io_lock;
859 /* current io_end structure for async DIO write*/ 859 /* current io_end structure for async DIO write*/
860 ext4_io_end_t *cur_aio_dio; 860 ext4_io_end_t *cur_aio_dio;
861 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
861 862
862 /* 863 /*
863 * Transactions that contain inode's metadata needed to complete 864 * Transactions that contain inode's metadata needed to complete
@@ -909,6 +910,7 @@ struct ext4_inode_info {
909#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ 910#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
910#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 911#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
911#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 912#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
913#define EXT4_MOUNT_MBLK_IO_SUBMIT 0x4000000 /* multi-block io submits */
912#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 914#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
913#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 915#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
914#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 916#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
@@ -2060,6 +2062,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2060/* page-io.c */ 2062/* page-io.c */
2061extern int __init ext4_init_pageio(void); 2063extern int __init ext4_init_pageio(void);
2062extern void ext4_exit_pageio(void); 2064extern void ext4_exit_pageio(void);
2065extern void ext4_ioend_wait(struct inode *);
2063extern void ext4_free_io_end(ext4_io_end_t *io); 2066extern void ext4_free_io_end(ext4_io_end_t *io);
2064extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2067extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2065extern int ext4_end_io_nolock(ext4_io_end_t *io); 2068extern int ext4_end_io_nolock(ext4_io_end_t *io);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4d78342f3bf0..e659597b690b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -53,6 +53,7 @@
53static inline int ext4_begin_ordered_truncate(struct inode *inode, 53static inline int ext4_begin_ordered_truncate(struct inode *inode,
54 loff_t new_size) 54 loff_t new_size)
55{ 55{
56 trace_ext4_begin_ordered_truncate(inode, new_size);
56 return jbd2_journal_begin_ordered_truncate( 57 return jbd2_journal_begin_ordered_truncate(
57 EXT4_SB(inode->i_sb)->s_journal, 58 EXT4_SB(inode->i_sb)->s_journal,
58 &EXT4_I(inode)->jinode, 59 &EXT4_I(inode)->jinode,
@@ -178,6 +179,7 @@ void ext4_evict_inode(struct inode *inode)
178 handle_t *handle; 179 handle_t *handle;
179 int err; 180 int err;
180 181
182 trace_ext4_evict_inode(inode);
181 if (inode->i_nlink) { 183 if (inode->i_nlink) {
182 truncate_inode_pages(&inode->i_data, 0); 184 truncate_inode_pages(&inode->i_data, 0);
183 goto no_delete; 185 goto no_delete;
@@ -2123,9 +2125,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
2123 */ 2125 */
2124 if (unlikely(journal_data && PageChecked(page))) 2126 if (unlikely(journal_data && PageChecked(page)))
2125 err = __ext4_journalled_writepage(page, len); 2127 err = __ext4_journalled_writepage(page, len);
2126 else 2128 else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
2127 err = ext4_bio_write_page(&io_submit, page, 2129 err = ext4_bio_write_page(&io_submit, page,
2128 len, mpd->wbc); 2130 len, mpd->wbc);
2131 else
2132 err = block_write_full_page(page,
2133 noalloc_get_block_write, mpd->wbc);
2129 2134
2130 if (!err) 2135 if (!err)
2131 mpd->pages_written++; 2136 mpd->pages_written++;
@@ -5647,6 +5652,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5647 int err, ret; 5652 int err, ret;
5648 5653
5649 might_sleep(); 5654 might_sleep();
5655 trace_ext4_mark_inode_dirty(inode, _RET_IP_);
5650 err = ext4_reserve_inode_write(handle, inode, &iloc); 5656 err = ext4_reserve_inode_write(handle, inode, &iloc);
5651 if (ext4_handle_valid(handle) && 5657 if (ext4_handle_valid(handle) &&
5652 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && 5658 EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bf5ae883b1bd..eb3bc2fe647e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,6 +331,30 @@ mext_out:
331 return err; 331 return err;
332 } 332 }
333 333
334 case FITRIM:
335 {
336 struct super_block *sb = inode->i_sb;
337 struct fstrim_range range;
338 int ret = 0;
339
340 if (!capable(CAP_SYS_ADMIN))
341 return -EPERM;
342
343 if (copy_from_user(&range, (struct fstrim_range *)arg,
344 sizeof(range)))
345 return -EFAULT;
346
347 ret = ext4_trim_fs(sb, &range);
348 if (ret < 0)
349 return ret;
350
351 if (copy_to_user((struct fstrim_range *)arg, &range,
352 sizeof(range)))
353 return -EFAULT;
354
355 return 0;
356 }
357
334 default: 358 default:
335 return -ENOTTY; 359 return -ENOTTY;
336 } 360 }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c58eba34724a..5b4d4e3a4d58 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4640,8 +4640,6 @@ do_more:
4640 * with group lock held. generate_buddy look at 4640 * with group lock held. generate_buddy look at
4641 * them with group lock_held 4641 * them with group lock_held
4642 */ 4642 */
4643 if (test_opt(sb, DISCARD))
4644 ext4_issue_discard(sb, block_group, bit, count);
4645 ext4_lock_group(sb, block_group); 4643 ext4_lock_group(sb, block_group);
4646 mb_clear_bits(bitmap_bh->b_data, bit, count); 4644 mb_clear_bits(bitmap_bh->b_data, bit, count);
4647 mb_free_blocks(inode, &e4b, bit, count); 4645 mb_free_blocks(inode, &e4b, bit, count);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 92203b8a099f..dc40e75cba88 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -872,7 +872,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
872 if (namelen > EXT4_NAME_LEN) 872 if (namelen > EXT4_NAME_LEN)
873 return NULL; 873 return NULL;
874 if ((namelen <= 2) && (name[0] == '.') && 874 if ((namelen <= 2) && (name[0] == '.') &&
875 (name[1] == '.' || name[1] == '0')) { 875 (name[1] == '.' || name[1] == '\0')) {
876 /* 876 /*
877 * "." or ".." will only be in the first block 877 * "." or ".." will only be in the first block
878 * NFS may look up ".."; "." should be handled by the VFS 878 * NFS may look up ".."; "." should be handled by the VFS
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 46a7d6a9d976..beacce11ac50 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,8 +32,14 @@
32 32
33static struct kmem_cache *io_page_cachep, *io_end_cachep; 33static struct kmem_cache *io_page_cachep, *io_end_cachep;
34 34
35#define WQ_HASH_SZ 37
36#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
37static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
38
35int __init ext4_init_pageio(void) 39int __init ext4_init_pageio(void)
36{ 40{
41 int i;
42
37 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT); 43 io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
38 if (io_page_cachep == NULL) 44 if (io_page_cachep == NULL)
39 return -ENOMEM; 45 return -ENOMEM;
@@ -42,6 +48,8 @@ int __init ext4_init_pageio(void)
42 kmem_cache_destroy(io_page_cachep); 48 kmem_cache_destroy(io_page_cachep);
43 return -ENOMEM; 49 return -ENOMEM;
44 } 50 }
51 for (i = 0; i < WQ_HASH_SZ; i++)
52 init_waitqueue_head(&ioend_wq[i]);
45 53
46 return 0; 54 return 0;
47} 55}
@@ -52,24 +60,37 @@ void ext4_exit_pageio(void)
52 kmem_cache_destroy(io_page_cachep); 60 kmem_cache_destroy(io_page_cachep);
53} 61}
54 62
63void ext4_ioend_wait(struct inode *inode)
64{
65 wait_queue_head_t *wq = to_ioend_wq(inode);
66
67 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
68}
69
70static void put_io_page(struct ext4_io_page *io_page)
71{
72 if (atomic_dec_and_test(&io_page->p_count)) {
73 end_page_writeback(io_page->p_page);
74 put_page(io_page->p_page);
75 kmem_cache_free(io_page_cachep, io_page);
76 }
77}
78
55void ext4_free_io_end(ext4_io_end_t *io) 79void ext4_free_io_end(ext4_io_end_t *io)
56{ 80{
57 int i; 81 int i;
82 wait_queue_head_t *wq;
58 83
59 BUG_ON(!io); 84 BUG_ON(!io);
60 if (io->page) 85 if (io->page)
61 put_page(io->page); 86 put_page(io->page);
62 for (i = 0; i < io->num_io_pages; i++) { 87 for (i = 0; i < io->num_io_pages; i++)
63 if (--io->pages[i]->p_count == 0) { 88 put_io_page(io->pages[i]);
64 struct page *page = io->pages[i]->p_page;
65
66 end_page_writeback(page);
67 put_page(page);
68 kmem_cache_free(io_page_cachep, io->pages[i]);
69 }
70 }
71 io->num_io_pages = 0; 89 io->num_io_pages = 0;
72 iput(io->inode); 90 wq = to_ioend_wq(io->inode);
91 if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
92 waitqueue_active(wq))
93 wake_up_all(wq);
73 kmem_cache_free(io_end_cachep, io); 94 kmem_cache_free(io_end_cachep, io);
74} 95}
75 96
@@ -142,8 +163,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
142 io = kmem_cache_alloc(io_end_cachep, flags); 163 io = kmem_cache_alloc(io_end_cachep, flags);
143 if (io) { 164 if (io) {
144 memset(io, 0, sizeof(*io)); 165 memset(io, 0, sizeof(*io));
145 io->inode = igrab(inode); 166 atomic_inc(&EXT4_I(inode)->i_ioend_count);
146 BUG_ON(!io->inode); 167 io->inode = inode;
147 INIT_WORK(&io->work, ext4_end_io_work); 168 INIT_WORK(&io->work, ext4_end_io_work);
148 INIT_LIST_HEAD(&io->list); 169 INIT_LIST_HEAD(&io->list);
149 } 170 }
@@ -171,35 +192,15 @@ static void ext4_end_bio(struct bio *bio, int error)
171 struct workqueue_struct *wq; 192 struct workqueue_struct *wq;
172 struct inode *inode; 193 struct inode *inode;
173 unsigned long flags; 194 unsigned long flags;
174 ext4_fsblk_t err_block;
175 int i; 195 int i;
176 196
177 BUG_ON(!io_end); 197 BUG_ON(!io_end);
178 inode = io_end->inode;
179 bio->bi_private = NULL; 198 bio->bi_private = NULL;
180 bio->bi_end_io = NULL; 199 bio->bi_end_io = NULL;
181 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) 200 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
182 error = 0; 201 error = 0;
183 err_block = bio->bi_sector >> (inode->i_blkbits - 9);
184 bio_put(bio); 202 bio_put(bio);
185 203
186 if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
187 pr_err("sb umounted, discard end_io request for inode %lu\n",
188 io_end->inode->i_ino);
189 ext4_free_io_end(io_end);
190 return;
191 }
192
193 if (error) {
194 io_end->flag |= EXT4_IO_END_ERROR;
195 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
196 "(offset %llu size %ld starting block %llu)",
197 inode->i_ino,
198 (unsigned long long) io_end->offset,
199 (long) io_end->size,
200 (unsigned long long) err_block);
201 }
202
203 for (i = 0; i < io_end->num_io_pages; i++) { 204 for (i = 0; i < io_end->num_io_pages; i++) {
204 struct page *page = io_end->pages[i]->p_page; 205 struct page *page = io_end->pages[i]->p_page;
205 struct buffer_head *bh, *head; 206 struct buffer_head *bh, *head;
@@ -236,14 +237,6 @@ static void ext4_end_bio(struct bio *bio, int error)
236 } while (bh != head); 237 } while (bh != head);
237 } 238 }
238 239
239 if (--io_end->pages[i]->p_count == 0) {
240 struct page *page = io_end->pages[i]->p_page;
241
242 end_page_writeback(page);
243 put_page(page);
244 kmem_cache_free(io_page_cachep, io_end->pages[i]);
245 }
246
247 /* 240 /*
248 * If this is a partial write which happened to make 241 * If this is a partial write which happened to make
249 * all buffers uptodate then we can optimize away a 242 * all buffers uptodate then we can optimize away a
@@ -253,9 +246,22 @@ static void ext4_end_bio(struct bio *bio, int error)
253 */ 246 */
254 if (!partial_write) 247 if (!partial_write)
255 SetPageUptodate(page); 248 SetPageUptodate(page);
256 }
257 249
250 put_io_page(io_end->pages[i]);
251 }
258 io_end->num_io_pages = 0; 252 io_end->num_io_pages = 0;
253 inode = io_end->inode;
254
255 if (error) {
256 io_end->flag |= EXT4_IO_END_ERROR;
257 ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
258 "(offset %llu size %ld starting block %llu)",
259 inode->i_ino,
260 (unsigned long long) io_end->offset,
261 (long) io_end->size,
262 (unsigned long long)
263 bio->bi_sector >> (inode->i_blkbits - 9));
264 }
259 265
260 /* Add the io_end to per-inode completed io list*/ 266 /* Add the io_end to per-inode completed io list*/
261 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); 267 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -305,7 +311,6 @@ static int io_submit_init(struct ext4_io_submit *io,
305 bio->bi_private = io->io_end = io_end; 311 bio->bi_private = io->io_end = io_end;
306 bio->bi_end_io = ext4_end_bio; 312 bio->bi_end_io = ext4_end_bio;
307 313
308 io_end->inode = inode;
309 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); 314 io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
310 315
311 io->io_bio = bio; 316 io->io_bio = bio;
@@ -360,7 +365,7 @@ submit_and_retry:
360 if ((io_end->num_io_pages == 0) || 365 if ((io_end->num_io_pages == 0) ||
361 (io_end->pages[io_end->num_io_pages-1] != io_page)) { 366 (io_end->pages[io_end->num_io_pages-1] != io_page)) {
362 io_end->pages[io_end->num_io_pages++] = io_page; 367 io_end->pages[io_end->num_io_pages++] = io_page;
363 io_page->p_count++; 368 atomic_inc(&io_page->p_count);
364 } 369 }
365 return 0; 370 return 0;
366} 371}
@@ -389,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
389 return -ENOMEM; 394 return -ENOMEM;
390 } 395 }
391 io_page->p_page = page; 396 io_page->p_page = page;
392 io_page->p_count = 0; 397 atomic_set(&io_page->p_count, 1);
393 get_page(page); 398 get_page(page);
394 399
395 for (bh = head = page_buffers(page), block_start = 0; 400 for (bh = head = page_buffers(page), block_start = 0;
@@ -421,10 +426,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
421 * PageWriteback bit from the page to prevent the system from 426 * PageWriteback bit from the page to prevent the system from
422 * wedging later on. 427 * wedging later on.
423 */ 428 */
424 if (io_page->p_count == 0) { 429 put_io_page(io_page);
425 put_page(page);
426 end_page_writeback(page);
427 kmem_cache_free(io_page_cachep, io_page);
428 }
429 return ret; 430 return ret;
430} 431}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index dc963929de65..981c8477adab 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -232,6 +232,8 @@ static int setup_new_group_blocks(struct super_block *sb,
232 GFP_NOFS); 232 GFP_NOFS);
233 if (err) 233 if (err)
234 goto exit_bh; 234 goto exit_bh;
235 for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
236 ext4_set_bit(bit, bh->b_data);
235 237
236 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 238 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
237 input->block_bitmap - start); 239 input->block_bitmap - start);
@@ -247,6 +249,9 @@ static int setup_new_group_blocks(struct super_block *sb,
247 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 249 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
248 if (err) 250 if (err)
249 goto exit_bh; 251 goto exit_bh;
252 for (i = 0, bit = input->inode_table - start;
253 i < sbi->s_itb_per_group; i++, bit++)
254 ext4_set_bit(bit, bh->b_data);
250 255
251 if ((err = extend_or_restart_transaction(handle, 2, bh))) 256 if ((err = extend_or_restart_transaction(handle, 2, bh)))
252 goto exit_bh; 257 goto exit_bh;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 40131b777af6..cd37f9d5e447 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -828,12 +828,29 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
828 ei->cur_aio_dio = NULL; 828 ei->cur_aio_dio = NULL;
829 ei->i_sync_tid = 0; 829 ei->i_sync_tid = 0;
830 ei->i_datasync_tid = 0; 830 ei->i_datasync_tid = 0;
831 atomic_set(&ei->i_ioend_count, 0);
831 832
832 return &ei->vfs_inode; 833 return &ei->vfs_inode;
833} 834}
834 835
836static int ext4_drop_inode(struct inode *inode)
837{
838 int drop = generic_drop_inode(inode);
839
840 trace_ext4_drop_inode(inode, drop);
841 return drop;
842}
843
844static void ext4_i_callback(struct rcu_head *head)
845{
846 struct inode *inode = container_of(head, struct inode, i_rcu);
847 INIT_LIST_HEAD(&inode->i_dentry);
848 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
849}
850
835static void ext4_destroy_inode(struct inode *inode) 851static void ext4_destroy_inode(struct inode *inode)
836{ 852{
853 ext4_ioend_wait(inode);
837 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 854 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
838 ext4_msg(inode->i_sb, KERN_ERR, 855 ext4_msg(inode->i_sb, KERN_ERR,
839 "Inode %lu (%p): orphan list check failed!", 856 "Inode %lu (%p): orphan list check failed!",
@@ -843,7 +860,7 @@ static void ext4_destroy_inode(struct inode *inode)
843 true); 860 true);
844 dump_stack(); 861 dump_stack();
845 } 862 }
846 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 863 call_rcu(&inode->i_rcu, ext4_i_callback);
847} 864}
848 865
849static void init_once(void *foo) 866static void init_once(void *foo)
@@ -1016,6 +1033,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1016 !(def_mount_opts & EXT4_DEFM_NODELALLOC)) 1033 !(def_mount_opts & EXT4_DEFM_NODELALLOC))
1017 seq_puts(seq, ",nodelalloc"); 1034 seq_puts(seq, ",nodelalloc");
1018 1035
1036 if (test_opt(sb, MBLK_IO_SUBMIT))
1037 seq_puts(seq, ",mblk_io_submit");
1019 if (sbi->s_stripe) 1038 if (sbi->s_stripe)
1020 seq_printf(seq, ",stripe=%lu", sbi->s_stripe); 1039 seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
1021 /* 1040 /*
@@ -1173,6 +1192,7 @@ static const struct super_operations ext4_sops = {
1173 .destroy_inode = ext4_destroy_inode, 1192 .destroy_inode = ext4_destroy_inode,
1174 .write_inode = ext4_write_inode, 1193 .write_inode = ext4_write_inode,
1175 .dirty_inode = ext4_dirty_inode, 1194 .dirty_inode = ext4_dirty_inode,
1195 .drop_inode = ext4_drop_inode,
1176 .evict_inode = ext4_evict_inode, 1196 .evict_inode = ext4_evict_inode,
1177 .put_super = ext4_put_super, 1197 .put_super = ext4_put_super,
1178 .sync_fs = ext4_sync_fs, 1198 .sync_fs = ext4_sync_fs,
@@ -1186,7 +1206,6 @@ static const struct super_operations ext4_sops = {
1186 .quota_write = ext4_quota_write, 1206 .quota_write = ext4_quota_write,
1187#endif 1207#endif
1188 .bdev_try_to_free_page = bdev_try_to_free_page, 1208 .bdev_try_to_free_page = bdev_try_to_free_page,
1189 .trim_fs = ext4_trim_fs
1190}; 1209};
1191 1210
1192static const struct super_operations ext4_nojournal_sops = { 1211static const struct super_operations ext4_nojournal_sops = {
@@ -1194,6 +1213,7 @@ static const struct super_operations ext4_nojournal_sops = {
1194 .destroy_inode = ext4_destroy_inode, 1213 .destroy_inode = ext4_destroy_inode,
1195 .write_inode = ext4_write_inode, 1214 .write_inode = ext4_write_inode,
1196 .dirty_inode = ext4_dirty_inode, 1215 .dirty_inode = ext4_dirty_inode,
1216 .drop_inode = ext4_drop_inode,
1197 .evict_inode = ext4_evict_inode, 1217 .evict_inode = ext4_evict_inode,
1198 .write_super = ext4_write_super, 1218 .write_super = ext4_write_super,
1199 .put_super = ext4_put_super, 1219 .put_super = ext4_put_super,
@@ -1228,8 +1248,8 @@ enum {
1228 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, 1248 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1229 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, 1249 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
1230 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, 1250 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
1231 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1251 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
1232 Opt_block_validity, Opt_noblock_validity, 1252 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1233 Opt_inode_readahead_blks, Opt_journal_ioprio, 1253 Opt_inode_readahead_blks, Opt_journal_ioprio,
1234 Opt_dioread_nolock, Opt_dioread_lock, 1254 Opt_dioread_nolock, Opt_dioread_lock,
1235 Opt_discard, Opt_nodiscard, 1255 Opt_discard, Opt_nodiscard,
@@ -1293,6 +1313,8 @@ static const match_table_t tokens = {
1293 {Opt_resize, "resize"}, 1313 {Opt_resize, "resize"},
1294 {Opt_delalloc, "delalloc"}, 1314 {Opt_delalloc, "delalloc"},
1295 {Opt_nodelalloc, "nodelalloc"}, 1315 {Opt_nodelalloc, "nodelalloc"},
1316 {Opt_mblk_io_submit, "mblk_io_submit"},
1317 {Opt_nomblk_io_submit, "nomblk_io_submit"},
1296 {Opt_block_validity, "block_validity"}, 1318 {Opt_block_validity, "block_validity"},
1297 {Opt_noblock_validity, "noblock_validity"}, 1319 {Opt_noblock_validity, "noblock_validity"},
1298 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, 1320 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1714,6 +1736,12 @@ set_qf_format:
1714 case Opt_nodelalloc: 1736 case Opt_nodelalloc:
1715 clear_opt(sbi->s_mount_opt, DELALLOC); 1737 clear_opt(sbi->s_mount_opt, DELALLOC);
1716 break; 1738 break;
1739 case Opt_mblk_io_submit:
1740 set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
1741 break;
1742 case Opt_nomblk_io_submit:
1743 clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
1744 break;
1717 case Opt_stripe: 1745 case Opt_stripe:
1718 if (match_int(&args[0], &option)) 1746 if (match_int(&args[0], &option))
1719 return 0; 1747 return 0;
@@ -2699,7 +2727,6 @@ static int ext4_lazyinit_thread(void *arg)
2699 struct ext4_li_request *elr; 2727 struct ext4_li_request *elr;
2700 unsigned long next_wakeup; 2728 unsigned long next_wakeup;
2701 DEFINE_WAIT(wait); 2729 DEFINE_WAIT(wait);
2702 int ret;
2703 2730
2704 BUG_ON(NULL == eli); 2731 BUG_ON(NULL == eli);
2705 2732
@@ -2723,13 +2750,12 @@ cont_thread:
2723 elr = list_entry(pos, struct ext4_li_request, 2750 elr = list_entry(pos, struct ext4_li_request,
2724 lr_request); 2751 lr_request);
2725 2752
2726 if (time_after_eq(jiffies, elr->lr_next_sched)) 2753 if (time_after_eq(jiffies, elr->lr_next_sched)) {
2727 ret = ext4_run_li_request(elr); 2754 if (ext4_run_li_request(elr) != 0) {
2728 2755 /* error, remove the lazy_init job */
2729 if (ret) { 2756 ext4_remove_li_request(elr);
2730 ret = 0; 2757 continue;
2731 ext4_remove_li_request(elr); 2758 }
2732 continue;
2733 } 2759 }
2734 2760
2735 if (time_before(elr->lr_next_sched, next_wakeup)) 2761 if (time_before(elr->lr_next_sched, next_wakeup))
@@ -2740,7 +2766,8 @@ cont_thread:
2740 if (freezing(current)) 2766 if (freezing(current))
2741 refrigerator(); 2767 refrigerator();
2742 2768
2743 if (time_after_eq(jiffies, next_wakeup)) { 2769 if ((time_after_eq(jiffies, next_wakeup)) ||
2770 (MAX_JIFFY_OFFSET == next_wakeup)) {
2744 cond_resched(); 2771 cond_resched();
2745 continue; 2772 continue;
2746 } 2773 }
@@ -2788,9 +2815,6 @@ static void ext4_clear_request_list(void)
2788 struct ext4_li_request *elr; 2815 struct ext4_li_request *elr;
2789 2816
2790 mutex_lock(&ext4_li_info->li_list_mtx); 2817 mutex_lock(&ext4_li_info->li_list_mtx);
2791 if (list_empty(&ext4_li_info->li_request_list))
2792 return;
2793
2794 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) { 2818 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
2795 elr = list_entry(pos, struct ext4_li_request, 2819 elr = list_entry(pos, struct ext4_li_request,
2796 lr_request); 2820 lr_request);
@@ -3257,13 +3281,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3257 * Test whether we have more sectors than will fit in sector_t, 3281 * Test whether we have more sectors than will fit in sector_t,
3258 * and whether the max offset is addressable by the page cache. 3282 * and whether the max offset is addressable by the page cache.
3259 */ 3283 */
3260 ret = generic_check_addressable(sb->s_blocksize_bits, 3284 err = generic_check_addressable(sb->s_blocksize_bits,
3261 ext4_blocks_count(es)); 3285 ext4_blocks_count(es));
3262 if (ret) { 3286 if (err) {
3263 ext4_msg(sb, KERN_ERR, "filesystem" 3287 ext4_msg(sb, KERN_ERR, "filesystem"
3264 " too large to mount safely on this system"); 3288 " too large to mount safely on this system");
3265 if (sizeof(sector_t) < 8) 3289 if (sizeof(sector_t) < 8)
3266 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); 3290 ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
3291 ret = err;
3267 goto failed_mount; 3292 goto failed_mount;
3268 } 3293 }
3269 3294
@@ -3348,6 +3373,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3348 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3373 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3349 spin_lock_init(&sbi->s_next_gen_lock); 3374 spin_lock_init(&sbi->s_next_gen_lock);
3350 3375
3376 err = percpu_counter_init(&sbi->s_freeblocks_counter,
3377 ext4_count_free_blocks(sb));
3378 if (!err) {
3379 err = percpu_counter_init(&sbi->s_freeinodes_counter,
3380 ext4_count_free_inodes(sb));
3381 }
3382 if (!err) {
3383 err = percpu_counter_init(&sbi->s_dirs_counter,
3384 ext4_count_dirs(sb));
3385 }
3386 if (!err) {
3387 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
3388 }
3389 if (err) {
3390 ext4_msg(sb, KERN_ERR, "insufficient memory");
3391 goto failed_mount3;
3392 }
3393
3351 sbi->s_stripe = ext4_get_stripe_size(sbi); 3394 sbi->s_stripe = ext4_get_stripe_size(sbi);
3352 sbi->s_max_writeback_mb_bump = 128; 3395 sbi->s_max_writeback_mb_bump = 128;
3353 3396
@@ -3446,22 +3489,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3446 } 3489 }
3447 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); 3490 set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3448 3491
3449no_journal: 3492 /*
3450 err = percpu_counter_init(&sbi->s_freeblocks_counter, 3493 * The journal may have updated the bg summary counts, so we
3451 ext4_count_free_blocks(sb)); 3494 * need to update the global counters.
3452 if (!err) 3495 */
3453 err = percpu_counter_init(&sbi->s_freeinodes_counter, 3496 percpu_counter_set(&sbi->s_freeblocks_counter,
3454 ext4_count_free_inodes(sb)); 3497 ext4_count_free_blocks(sb));
3455 if (!err) 3498 percpu_counter_set(&sbi->s_freeinodes_counter,
3456 err = percpu_counter_init(&sbi->s_dirs_counter, 3499 ext4_count_free_inodes(sb));
3457 ext4_count_dirs(sb)); 3500 percpu_counter_set(&sbi->s_dirs_counter,
3458 if (!err) 3501 ext4_count_dirs(sb));
3459 err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); 3502 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
3460 if (err) {
3461 ext4_msg(sb, KERN_ERR, "insufficient memory");
3462 goto failed_mount_wq;
3463 }
3464 3503
3504no_journal:
3465 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 3505 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
3466 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3506 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3467 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3507 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3611,10 +3651,6 @@ failed_mount_wq:
3611 jbd2_journal_destroy(sbi->s_journal); 3651 jbd2_journal_destroy(sbi->s_journal);
3612 sbi->s_journal = NULL; 3652 sbi->s_journal = NULL;
3613 } 3653 }
3614 percpu_counter_destroy(&sbi->s_freeblocks_counter);
3615 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3616 percpu_counter_destroy(&sbi->s_dirs_counter);
3617 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
3618failed_mount3: 3654failed_mount3:
3619 if (sbi->s_flex_groups) { 3655 if (sbi->s_flex_groups) {
3620 if (is_vmalloc_addr(sbi->s_flex_groups)) 3656 if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3622,6 +3658,10 @@ failed_mount3:
3622 else 3658 else
3623 kfree(sbi->s_flex_groups); 3659 kfree(sbi->s_flex_groups);
3624 } 3660 }
3661 percpu_counter_destroy(&sbi->s_freeblocks_counter);
3662 percpu_counter_destroy(&sbi->s_freeinodes_counter);
3663 percpu_counter_destroy(&sbi->s_dirs_counter);
3664 percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
3625failed_mount2: 3665failed_mount2:
3626 for (i = 0; i < db_count; i++) 3666 for (i = 0; i < db_count; i++)
3627 brelse(sbi->s_group_desc[i]); 3667 brelse(sbi->s_group_desc[i]);
@@ -3949,13 +3989,11 @@ static int ext4_commit_super(struct super_block *sb, int sync)
3949 else 3989 else
3950 es->s_kbytes_written = 3990 es->s_kbytes_written =
3951 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 3991 cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
3952 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter)) 3992 ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
3953 ext4_free_blocks_count_set(es, percpu_counter_sum_positive( 3993 &EXT4_SB(sb)->s_freeblocks_counter));
3954 &EXT4_SB(sb)->s_freeblocks_counter)); 3994 es->s_free_inodes_count =
3955 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter)) 3995 cpu_to_le32(percpu_counter_sum_positive(
3956 es->s_free_inodes_count = 3996 &EXT4_SB(sb)->s_freeinodes_counter));
3957 cpu_to_le32(percpu_counter_sum_positive(
3958 &EXT4_SB(sb)->s_freeinodes_counter));
3959 sb->s_dirt = 0; 3997 sb->s_dirt = 0;
3960 BUFFER_TRACE(sbh, "marking dirty"); 3998 BUFFER_TRACE(sbh, "marking dirty");
3961 mark_buffer_dirty(sbh); 3999 mark_buffer_dirty(sbh);
@@ -4556,12 +4594,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4556 4594
4557static int ext4_quota_off(struct super_block *sb, int type) 4595static int ext4_quota_off(struct super_block *sb, int type)
4558{ 4596{
4559 /* Force all delayed allocation blocks to be allocated */ 4597 /* Force all delayed allocation blocks to be allocated.
4560 if (test_opt(sb, DELALLOC)) { 4598 * Caller already holds s_umount sem */
4561 down_read(&sb->s_umount); 4599 if (test_opt(sb, DELALLOC))
4562 sync_filesystem(sb); 4600 sync_filesystem(sb);
4563 up_read(&sb->s_umount);
4564 }
4565 4601
4566 return dquot_quota_off(sb, type); 4602 return dquot_quota_off(sb, type);
4567} 4603}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index ad6998a92c30..206351af7c58 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -514,11 +514,18 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
514 return &ei->vfs_inode; 514 return &ei->vfs_inode;
515} 515}
516 516
517static void fat_destroy_inode(struct inode *inode) 517static void fat_i_callback(struct rcu_head *head)
518{ 518{
519 struct inode *inode = container_of(head, struct inode, i_rcu);
520 INIT_LIST_HEAD(&inode->i_dentry);
519 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); 521 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
520} 522}
521 523
524static void fat_destroy_inode(struct inode *inode)
525{
526 call_rcu(&inode->i_rcu, fat_i_callback);
527}
528
522static void init_once(void *foo) 529static void init_once(void *foo)
523{ 530{
524 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; 531 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
@@ -743,7 +750,7 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
743 */ 750 */
744 result = d_obtain_alias(inode); 751 result = d_obtain_alias(inode);
745 if (!IS_ERR(result)) 752 if (!IS_ERR(result))
746 result->d_op = sb->s_root->d_op; 753 d_set_d_op(result, sb->s_root->d_op);
747 return result; 754 return result;
748} 755}
749 756
@@ -793,7 +800,7 @@ static struct dentry *fat_get_parent(struct dentry *child)
793 800
794 parent = d_obtain_alias(inode); 801 parent = d_obtain_alias(inode);
795 if (!IS_ERR(parent)) 802 if (!IS_ERR(parent))
796 parent->d_op = sb->s_root->d_op; 803 d_set_d_op(parent, sb->s_root->d_op);
797out: 804out:
798 unlock_super(sb); 805 unlock_super(sb);
799 806
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3345aabd1dd7..35ffe43afa4b 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -148,7 +148,8 @@ static int msdos_find(struct inode *dir, const unsigned char *name, int len,
148 * that the existing dentry can be used. The msdos fs routines will 148 * that the existing dentry can be used. The msdos fs routines will
149 * return ENOENT or EINVAL as appropriate. 149 * return ENOENT or EINVAL as appropriate.
150 */ 150 */
151static int msdos_hash(struct dentry *dentry, struct qstr *qstr) 151static int msdos_hash(const struct dentry *dentry, const struct inode *inode,
152 struct qstr *qstr)
152{ 153{
153 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; 154 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options;
154 unsigned char msdos_name[MSDOS_NAME]; 155 unsigned char msdos_name[MSDOS_NAME];
@@ -164,16 +165,18 @@ static int msdos_hash(struct dentry *dentry, struct qstr *qstr)
164 * Compare two msdos names. If either of the names are invalid, 165 * Compare two msdos names. If either of the names are invalid,
165 * we fall back to doing the standard name comparison. 166 * we fall back to doing the standard name comparison.
166 */ 167 */
167static int msdos_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) 168static int msdos_cmp(const struct dentry *parent, const struct inode *pinode,
169 const struct dentry *dentry, const struct inode *inode,
170 unsigned int len, const char *str, const struct qstr *name)
168{ 171{
169 struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; 172 struct fat_mount_options *options = &MSDOS_SB(parent->d_sb)->options;
170 unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; 173 unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME];
171 int error; 174 int error;
172 175
173 error = msdos_format_name(a->name, a->len, a_msdos_name, options); 176 error = msdos_format_name(name->name, name->len, a_msdos_name, options);
174 if (error) 177 if (error)
175 goto old_compare; 178 goto old_compare;
176 error = msdos_format_name(b->name, b->len, b_msdos_name, options); 179 error = msdos_format_name(str, len, b_msdos_name, options);
177 if (error) 180 if (error)
178 goto old_compare; 181 goto old_compare;
179 error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); 182 error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME);
@@ -182,8 +185,8 @@ out:
182 185
183old_compare: 186old_compare:
184 error = 1; 187 error = 1;
185 if (a->len == b->len) 188 if (name->len == len)
186 error = memcmp(a->name, b->name, a->len); 189 error = memcmp(name->name, str, len);
187 goto out; 190 goto out;
188} 191}
189 192
@@ -224,10 +227,10 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
224 } 227 }
225out: 228out:
226 unlock_super(sb); 229 unlock_super(sb);
227 dentry->d_op = &msdos_dentry_operations; 230 d_set_d_op(dentry, &msdos_dentry_operations);
228 dentry = d_splice_alias(inode, dentry); 231 dentry = d_splice_alias(inode, dentry);
229 if (dentry) 232 if (dentry)
230 dentry->d_op = &msdos_dentry_operations; 233 d_set_d_op(dentry, &msdos_dentry_operations);
231 return dentry; 234 return dentry;
232 235
233error: 236error:
@@ -670,7 +673,7 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
670 } 673 }
671 674
672 sb->s_flags |= MS_NOATIME; 675 sb->s_flags |= MS_NOATIME;
673 sb->s_root->d_op = &msdos_dentry_operations; 676 d_set_d_op(sb->s_root, &msdos_dentry_operations);
674 unlock_super(sb); 677 unlock_super(sb);
675 return 0; 678 return 0;
676} 679}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index b936703b8924..e3ffc5e12332 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,6 +43,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU)
47 return -ECHILD;
48
46 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
47 if (dentry->d_inode) 50 if (dentry->d_inode)
48 return 1; 51 return 1;
@@ -51,6 +54,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
51 54
52static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
53{ 56{
57 if (nd->flags & LOOKUP_RCU)
58 return -ECHILD;
59
54 /* 60 /*
55 * This is not negative dentry. Always valid. 61 * This is not negative dentry. Always valid.
56 * 62 *
@@ -85,22 +91,26 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
85} 91}
86 92
87/* returns the length of a struct qstr, ignoring trailing dots */ 93/* returns the length of a struct qstr, ignoring trailing dots */
88static unsigned int vfat_striptail_len(struct qstr *qstr) 94static unsigned int __vfat_striptail_len(unsigned int len, const char *name)
89{ 95{
90 unsigned int len = qstr->len; 96 while (len && name[len - 1] == '.')
91
92 while (len && qstr->name[len - 1] == '.')
93 len--; 97 len--;
94 return len; 98 return len;
95} 99}
96 100
101static unsigned int vfat_striptail_len(const struct qstr *qstr)
102{
103 return __vfat_striptail_len(qstr->len, qstr->name);
104}
105
97/* 106/*
98 * Compute the hash for the vfat name corresponding to the dentry. 107 * Compute the hash for the vfat name corresponding to the dentry.
99 * Note: if the name is invalid, we leave the hash code unchanged so 108 * Note: if the name is invalid, we leave the hash code unchanged so
100 * that the existing dentry can be used. The vfat fs routines will 109 * that the existing dentry can be used. The vfat fs routines will
101 * return ENOENT or EINVAL as appropriate. 110 * return ENOENT or EINVAL as appropriate.
102 */ 111 */
103static int vfat_hash(struct dentry *dentry, struct qstr *qstr) 112static int vfat_hash(const struct dentry *dentry, const struct inode *inode,
113 struct qstr *qstr)
104{ 114{
105 qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr)); 115 qstr->hash = full_name_hash(qstr->name, vfat_striptail_len(qstr));
106 return 0; 116 return 0;
@@ -112,9 +122,10 @@ static int vfat_hash(struct dentry *dentry, struct qstr *qstr)
112 * that the existing dentry can be used. The vfat fs routines will 122 * that the existing dentry can be used. The vfat fs routines will
113 * return ENOENT or EINVAL as appropriate. 123 * return ENOENT or EINVAL as appropriate.
114 */ 124 */
115static int vfat_hashi(struct dentry *dentry, struct qstr *qstr) 125static int vfat_hashi(const struct dentry *dentry, const struct inode *inode,
126 struct qstr *qstr)
116{ 127{
117 struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; 128 struct nls_table *t = MSDOS_SB(dentry->d_sb)->nls_io;
118 const unsigned char *name; 129 const unsigned char *name;
119 unsigned int len; 130 unsigned int len;
120 unsigned long hash; 131 unsigned long hash;
@@ -133,16 +144,18 @@ static int vfat_hashi(struct dentry *dentry, struct qstr *qstr)
133/* 144/*
134 * Case insensitive compare of two vfat names. 145 * Case insensitive compare of two vfat names.
135 */ 146 */
136static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b) 147static int vfat_cmpi(const struct dentry *parent, const struct inode *pinode,
148 const struct dentry *dentry, const struct inode *inode,
149 unsigned int len, const char *str, const struct qstr *name)
137{ 150{
138 struct nls_table *t = MSDOS_SB(dentry->d_inode->i_sb)->nls_io; 151 struct nls_table *t = MSDOS_SB(parent->d_sb)->nls_io;
139 unsigned int alen, blen; 152 unsigned int alen, blen;
140 153
141 /* A filename cannot end in '.' or we treat it like it has none */ 154 /* A filename cannot end in '.' or we treat it like it has none */
142 alen = vfat_striptail_len(a); 155 alen = vfat_striptail_len(name);
143 blen = vfat_striptail_len(b); 156 blen = __vfat_striptail_len(len, str);
144 if (alen == blen) { 157 if (alen == blen) {
145 if (nls_strnicmp(t, a->name, b->name, alen) == 0) 158 if (nls_strnicmp(t, name->name, str, alen) == 0)
146 return 0; 159 return 0;
147 } 160 }
148 return 1; 161 return 1;
@@ -151,15 +164,17 @@ static int vfat_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b)
151/* 164/*
152 * Case sensitive compare of two vfat names. 165 * Case sensitive compare of two vfat names.
153 */ 166 */
154static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) 167static int vfat_cmp(const struct dentry *parent, const struct inode *pinode,
168 const struct dentry *dentry, const struct inode *inode,
169 unsigned int len, const char *str, const struct qstr *name)
155{ 170{
156 unsigned int alen, blen; 171 unsigned int alen, blen;
157 172
158 /* A filename cannot end in '.' or we treat it like it has none */ 173 /* A filename cannot end in '.' or we treat it like it has none */
159 alen = vfat_striptail_len(a); 174 alen = vfat_striptail_len(name);
160 blen = vfat_striptail_len(b); 175 blen = __vfat_striptail_len(len, str);
161 if (alen == blen) { 176 if (alen == blen) {
162 if (strncmp(a->name, b->name, alen) == 0) 177 if (strncmp(name->name, str, alen) == 0)
163 return 0; 178 return 0;
164 } 179 }
165 return 1; 180 return 1;
@@ -757,11 +772,11 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
757 772
758out: 773out:
759 unlock_super(sb); 774 unlock_super(sb);
760 dentry->d_op = sb->s_root->d_op; 775 d_set_d_op(dentry, sb->s_root->d_op);
761 dentry->d_time = dentry->d_parent->d_inode->i_version; 776 dentry->d_time = dentry->d_parent->d_inode->i_version;
762 dentry = d_splice_alias(inode, dentry); 777 dentry = d_splice_alias(inode, dentry);
763 if (dentry) { 778 if (dentry) {
764 dentry->d_op = sb->s_root->d_op; 779 d_set_d_op(dentry, sb->s_root->d_op);
765 dentry->d_time = dentry->d_parent->d_inode->i_version; 780 dentry->d_time = dentry->d_parent->d_inode->i_version;
766 } 781 }
767 return dentry; 782 return dentry;
@@ -1063,9 +1078,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
1063 } 1078 }
1064 1079
1065 if (MSDOS_SB(sb)->options.name_check != 's') 1080 if (MSDOS_SB(sb)->options.name_check != 's')
1066 sb->s_root->d_op = &vfat_ci_dentry_ops; 1081 d_set_d_op(sb->s_root, &vfat_ci_dentry_ops);
1067 else 1082 else
1068 sb->s_root->d_op = &vfat_dentry_ops; 1083 d_set_d_op(sb->s_root, &vfat_dentry_ops);
1069 1084
1070 unlock_super(sb); 1085 unlock_super(sb);
1071 return 0; 1086 return 0;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 68ba492d8eef..751d6b255a12 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -115,6 +115,9 @@ int unregister_filesystem(struct file_system_type * fs)
115 tmp = &(*tmp)->next; 115 tmp = &(*tmp)->next;
116 } 116 }
117 write_unlock(&file_systems_lock); 117 write_unlock(&file_systems_lock);
118
119 synchronize_rcu();
120
118 return -EINVAL; 121 return -EINVAL;
119} 122}
120 123
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 8c04eac5079d..2ba6719ac612 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -337,6 +337,13 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
337 return ip; 337 return ip;
338} 338}
339 339
340static void vxfs_i_callback(struct rcu_head *head)
341{
342 struct inode *inode = container_of(head, struct inode, i_rcu);
343 INIT_LIST_HEAD(&inode->i_dentry);
344 kmem_cache_free(vxfs_inode_cachep, inode->i_private);
345}
346
340/** 347/**
341 * vxfs_evict_inode - remove inode from main memory 348 * vxfs_evict_inode - remove inode from main memory
342 * @ip: inode to discard. 349 * @ip: inode to discard.
@@ -350,5 +357,5 @@ vxfs_evict_inode(struct inode *ip)
350{ 357{
351 truncate_inode_pages(&ip->i_data, 0); 358 truncate_inode_pages(&ip->i_data, 0);
352 end_writeback(ip); 359 end_writeback(ip);
353 kmem_cache_free(vxfs_inode_cachep, ip->i_private); 360 call_rcu(&ip->i_rcu, vxfs_i_callback);
354} 361}
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index ed45a9cf5f3d..68ca487bedb1 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -14,12 +14,14 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
14 struct path old_root; 14 struct path old_root;
15 15
16 spin_lock(&fs->lock); 16 spin_lock(&fs->lock);
17 write_seqcount_begin(&fs->seq);
17 old_root = fs->root; 18 old_root = fs->root;
18 fs->root = *path; 19 fs->root = *path;
19 path_get(path); 20 path_get_long(path);
21 write_seqcount_end(&fs->seq);
20 spin_unlock(&fs->lock); 22 spin_unlock(&fs->lock);
21 if (old_root.dentry) 23 if (old_root.dentry)
22 path_put(&old_root); 24 path_put_long(&old_root);
23} 25}
24 26
25/* 27/*
@@ -31,13 +33,15 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
31 struct path old_pwd; 33 struct path old_pwd;
32 34
33 spin_lock(&fs->lock); 35 spin_lock(&fs->lock);
36 write_seqcount_begin(&fs->seq);
34 old_pwd = fs->pwd; 37 old_pwd = fs->pwd;
35 fs->pwd = *path; 38 fs->pwd = *path;
36 path_get(path); 39 path_get_long(path);
40 write_seqcount_end(&fs->seq);
37 spin_unlock(&fs->lock); 41 spin_unlock(&fs->lock);
38 42
39 if (old_pwd.dentry) 43 if (old_pwd.dentry)
40 path_put(&old_pwd); 44 path_put_long(&old_pwd);
41} 45}
42 46
43void chroot_fs_refs(struct path *old_root, struct path *new_root) 47void chroot_fs_refs(struct path *old_root, struct path *new_root)
@@ -52,31 +56,33 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
52 fs = p->fs; 56 fs = p->fs;
53 if (fs) { 57 if (fs) {
54 spin_lock(&fs->lock); 58 spin_lock(&fs->lock);
59 write_seqcount_begin(&fs->seq);
55 if (fs->root.dentry == old_root->dentry 60 if (fs->root.dentry == old_root->dentry
56 && fs->root.mnt == old_root->mnt) { 61 && fs->root.mnt == old_root->mnt) {
57 path_get(new_root); 62 path_get_long(new_root);
58 fs->root = *new_root; 63 fs->root = *new_root;
59 count++; 64 count++;
60 } 65 }
61 if (fs->pwd.dentry == old_root->dentry 66 if (fs->pwd.dentry == old_root->dentry
62 && fs->pwd.mnt == old_root->mnt) { 67 && fs->pwd.mnt == old_root->mnt) {
63 path_get(new_root); 68 path_get_long(new_root);
64 fs->pwd = *new_root; 69 fs->pwd = *new_root;
65 count++; 70 count++;
66 } 71 }
72 write_seqcount_end(&fs->seq);
67 spin_unlock(&fs->lock); 73 spin_unlock(&fs->lock);
68 } 74 }
69 task_unlock(p); 75 task_unlock(p);
70 } while_each_thread(g, p); 76 } while_each_thread(g, p);
71 read_unlock(&tasklist_lock); 77 read_unlock(&tasklist_lock);
72 while (count--) 78 while (count--)
73 path_put(old_root); 79 path_put_long(old_root);
74} 80}
75 81
76void free_fs_struct(struct fs_struct *fs) 82void free_fs_struct(struct fs_struct *fs)
77{ 83{
78 path_put(&fs->root); 84 path_put_long(&fs->root);
79 path_put(&fs->pwd); 85 path_put_long(&fs->pwd);
80 kmem_cache_free(fs_cachep, fs); 86 kmem_cache_free(fs_cachep, fs);
81} 87}
82 88
@@ -88,8 +94,10 @@ void exit_fs(struct task_struct *tsk)
88 int kill; 94 int kill;
89 task_lock(tsk); 95 task_lock(tsk);
90 spin_lock(&fs->lock); 96 spin_lock(&fs->lock);
97 write_seqcount_begin(&fs->seq);
91 tsk->fs = NULL; 98 tsk->fs = NULL;
92 kill = !--fs->users; 99 kill = !--fs->users;
100 write_seqcount_end(&fs->seq);
93 spin_unlock(&fs->lock); 101 spin_unlock(&fs->lock);
94 task_unlock(tsk); 102 task_unlock(tsk);
95 if (kill) 103 if (kill)
@@ -105,8 +113,15 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
105 fs->users = 1; 113 fs->users = 1;
106 fs->in_exec = 0; 114 fs->in_exec = 0;
107 spin_lock_init(&fs->lock); 115 spin_lock_init(&fs->lock);
116 seqcount_init(&fs->seq);
108 fs->umask = old->umask; 117 fs->umask = old->umask;
109 get_fs_root_and_pwd(old, &fs->root, &fs->pwd); 118
119 spin_lock(&old->lock);
120 fs->root = old->root;
121 path_get_long(&fs->root);
122 fs->pwd = old->pwd;
123 path_get_long(&fs->pwd);
124 spin_unlock(&old->lock);
110 } 125 }
111 return fs; 126 return fs;
112} 127}
@@ -144,6 +159,7 @@ EXPORT_SYMBOL(current_umask);
144struct fs_struct init_fs = { 159struct fs_struct init_fs = {
145 .users = 1, 160 .users = 1,
146 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), 161 .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
162 .seq = SEQCNT_ZERO,
147 .umask = 0022, 163 .umask = 0022,
148}; 164};
149 165
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c9627c95482d..f738599fd8cd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -156,8 +156,12 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
156 */ 156 */
157static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) 157static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode = entry->d_inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU)
162 return -ECHILD;
163
164 inode = entry->d_inode;
161 if (inode && is_bad_inode(inode)) 165 if (inode && is_bad_inode(inode))
162 return 0; 166 return 0;
163 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 167 else if (fuse_dentry_time(entry) < get_jiffies_64()) {
@@ -347,7 +351,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
347 } 351 }
348 352
349 entry = newent ? newent : entry; 353 entry = newent ? newent : entry;
350 entry->d_op = &fuse_dentry_operations; 354 d_set_d_op(entry, &fuse_dentry_operations);
351 if (outarg_valid) 355 if (outarg_valid)
352 fuse_change_entry_timeout(entry, &outarg); 356 fuse_change_entry_timeout(entry, &outarg);
353 else 357 else
@@ -981,12 +985,15 @@ static int fuse_access(struct inode *inode, int mask)
981 * access request is sent. Execute permission is still checked 985 * access request is sent. Execute permission is still checked
982 * locally based on file mode. 986 * locally based on file mode.
983 */ 987 */
984static int fuse_permission(struct inode *inode, int mask) 988static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
985{ 989{
986 struct fuse_conn *fc = get_fuse_conn(inode); 990 struct fuse_conn *fc = get_fuse_conn(inode);
987 bool refreshed = false; 991 bool refreshed = false;
988 int err = 0; 992 int err = 0;
989 993
994 if (flags & IPERM_FLAG_RCU)
995 return -ECHILD;
996
990 if (!fuse_allow_task(fc, current)) 997 if (!fuse_allow_task(fc, current))
991 return -EACCES; 998 return -EACCES;
992 999
@@ -1001,7 +1008,7 @@ static int fuse_permission(struct inode *inode, int mask)
1001 } 1008 }
1002 1009
1003 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1010 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1004 err = generic_permission(inode, mask, NULL); 1011 err = generic_permission(inode, mask, flags, NULL);
1005 1012
1006 /* If permission is denied, try to refresh file 1013 /* If permission is denied, try to refresh file
1007 attributes. This is also needed, because the root 1014 attributes. This is also needed, because the root
@@ -1009,7 +1016,8 @@ static int fuse_permission(struct inode *inode, int mask)
1009 if (err == -EACCES && !refreshed) { 1016 if (err == -EACCES && !refreshed) {
1010 err = fuse_do_getattr(inode, NULL, NULL); 1017 err = fuse_do_getattr(inode, NULL, NULL);
1011 if (!err) 1018 if (!err)
1012 err = generic_permission(inode, mask, NULL); 1019 err = generic_permission(inode, mask,
1020 flags, NULL);
1013 } 1021 }
1014 1022
1015 /* Note: the opposite of the above test does not 1023 /* Note: the opposite of the above test does not
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c8224587123f..8b984a2cebbd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -13,6 +13,7 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/compat.h>
16 17
17static const struct file_operations fuse_direct_io_file_operations; 18static const struct file_operations fuse_direct_io_file_operations;
18 19
@@ -134,6 +135,7 @@ EXPORT_SYMBOL_GPL(fuse_do_open);
134void fuse_finish_open(struct inode *inode, struct file *file) 135void fuse_finish_open(struct inode *inode, struct file *file)
135{ 136{
136 struct fuse_file *ff = file->private_data; 137 struct fuse_file *ff = file->private_data;
138 struct fuse_conn *fc = get_fuse_conn(inode);
137 139
138 if (ff->open_flags & FOPEN_DIRECT_IO) 140 if (ff->open_flags & FOPEN_DIRECT_IO)
139 file->f_op = &fuse_direct_io_file_operations; 141 file->f_op = &fuse_direct_io_file_operations;
@@ -141,6 +143,15 @@ void fuse_finish_open(struct inode *inode, struct file *file)
141 invalidate_inode_pages2(inode->i_mapping); 143 invalidate_inode_pages2(inode->i_mapping);
142 if (ff->open_flags & FOPEN_NONSEEKABLE) 144 if (ff->open_flags & FOPEN_NONSEEKABLE)
143 nonseekable_open(inode, file); 145 nonseekable_open(inode, file);
146 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
147 struct fuse_inode *fi = get_fuse_inode(inode);
148
149 spin_lock(&fc->lock);
150 fi->attr_version = ++fc->attr_version;
151 i_size_write(inode, 0);
152 spin_unlock(&fc->lock);
153 fuse_invalidate_attr(inode);
154 }
144} 155}
145 156
146int fuse_open_common(struct inode *inode, struct file *file, bool isdir) 157int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
@@ -1618,6 +1629,58 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
1618} 1629}
1619 1630
1620/* 1631/*
1632 * CUSE servers compiled on 32bit broke on 64bit kernels because the
1633 * ABI was defined to be 'struct iovec' which is different on 32bit
1634 * and 64bit. Fortunately we can determine which structure the server
1635 * used from the size of the reply.
1636 */
1637static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src,
1638 size_t transferred, unsigned count,
1639 bool is_compat)
1640{
1641#ifdef CONFIG_COMPAT
1642 if (count * sizeof(struct compat_iovec) == transferred) {
1643 struct compat_iovec *ciov = src;
1644 unsigned i;
1645
1646 /*
1647 * With this interface a 32bit server cannot support
1648 * non-compat (i.e. ones coming from 64bit apps) ioctl
1649 * requests
1650 */
1651 if (!is_compat)
1652 return -EINVAL;
1653
1654 for (i = 0; i < count; i++) {
1655 dst[i].iov_base = compat_ptr(ciov[i].iov_base);
1656 dst[i].iov_len = ciov[i].iov_len;
1657 }
1658 return 0;
1659 }
1660#endif
1661
1662 if (count * sizeof(struct iovec) != transferred)
1663 return -EIO;
1664
1665 memcpy(dst, src, transferred);
1666 return 0;
1667}
1668
1669/* Make sure iov_length() won't overflow */
1670static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
1671{
1672 size_t n;
1673 u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
1674
1675 for (n = 0; n < count; n++) {
1676 if (iov->iov_len > (size_t) max)
1677 return -ENOMEM;
1678 max -= iov->iov_len;
1679 }
1680 return 0;
1681}
1682
1683/*
1621 * For ioctls, there is no generic way to determine how much memory 1684 * For ioctls, there is no generic way to determine how much memory
1622 * needs to be read and/or written. Furthermore, ioctls are allowed 1685 * needs to be read and/or written. Furthermore, ioctls are allowed
1623 * to dereference the passed pointer, so the parameter requires deep 1686 * to dereference the passed pointer, so the parameter requires deep
@@ -1798,18 +1861,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
1798 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 1861 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
1799 goto out; 1862 goto out;
1800 1863
1801 err = -EIO;
1802 if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
1803 goto out;
1804
1805 /* okay, copy in iovs and retry */
1806 vaddr = kmap_atomic(pages[0], KM_USER0); 1864 vaddr = kmap_atomic(pages[0], KM_USER0);
1807 memcpy(page_address(iov_page), vaddr, transferred); 1865 err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr,
1866 transferred, in_iovs + out_iovs,
1867 (flags & FUSE_IOCTL_COMPAT) != 0);
1808 kunmap_atomic(vaddr, KM_USER0); 1868 kunmap_atomic(vaddr, KM_USER0);
1869 if (err)
1870 goto out;
1809 1871
1810 in_iov = page_address(iov_page); 1872 in_iov = page_address(iov_page);
1811 out_iov = in_iov + in_iovs; 1873 out_iov = in_iov + in_iovs;
1812 1874
1875 err = fuse_verify_ioctl_iov(in_iov, in_iovs);
1876 if (err)
1877 goto out;
1878
1879 err = fuse_verify_ioctl_iov(out_iov, out_iovs);
1880 if (err)
1881 goto out;
1882
1813 goto retry; 1883 goto retry;
1814 } 1884 }
1815 1885
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index cfce3ad86a92..a8b31da19b93 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -99,6 +99,13 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
99 return inode; 99 return inode;
100} 100}
101 101
102static void fuse_i_callback(struct rcu_head *head)
103{
104 struct inode *inode = container_of(head, struct inode, i_rcu);
105 INIT_LIST_HEAD(&inode->i_dentry);
106 kmem_cache_free(fuse_inode_cachep, inode);
107}
108
102static void fuse_destroy_inode(struct inode *inode) 109static void fuse_destroy_inode(struct inode *inode)
103{ 110{
104 struct fuse_inode *fi = get_fuse_inode(inode); 111 struct fuse_inode *fi = get_fuse_inode(inode);
@@ -106,7 +113,7 @@ static void fuse_destroy_inode(struct inode *inode)
106 BUG_ON(!list_empty(&fi->queued_writes)); 113 BUG_ON(!list_empty(&fi->queued_writes));
107 if (fi->forget_req) 114 if (fi->forget_req)
108 fuse_request_free(fi->forget_req); 115 fuse_request_free(fi->forget_req);
109 kmem_cache_free(fuse_inode_cachep, inode); 116 call_rcu(&inode->i_rcu, fuse_i_callback);
110} 117}
111 118
112void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req, 119void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
@@ -619,7 +626,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
619 626
620 entry = d_obtain_alias(inode); 627 entry = d_obtain_alias(inode);
621 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { 628 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
622 entry->d_op = &fuse_dentry_operations; 629 d_set_d_op(entry, &fuse_dentry_operations);
623 fuse_invalidate_entry_cache(entry); 630 fuse_invalidate_entry_cache(entry);
624 } 631 }
625 632
@@ -721,7 +728,7 @@ static struct dentry *fuse_get_parent(struct dentry *child)
721 728
722 parent = d_obtain_alias(inode); 729 parent = d_obtain_alias(inode);
723 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { 730 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
724 parent->d_op = &fuse_dentry_operations; 731 d_set_d_op(parent, &fuse_dentry_operations);
725 fuse_invalidate_entry_cache(parent); 732 fuse_invalidate_entry_cache(parent);
726 } 733 }
727 734
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 6bc9e3a5a693..06c48a891832 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,14 +190,20 @@ generic_acl_chmod(struct inode *inode)
190} 190}
191 191
192int 192int
193generic_check_acl(struct inode *inode, int mask) 193generic_check_acl(struct inode *inode, int mask, unsigned int flags)
194{ 194{
195 struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS); 195 if (flags & IPERM_FLAG_RCU) {
196 196 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
197 if (acl) { 197 return -ECHILD;
198 int error = posix_acl_permission(inode, acl, mask); 198 } else {
199 posix_acl_release(acl); 199 struct posix_acl *acl;
200 return error; 200
201 acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
202 if (acl) {
203 int error = posix_acl_permission(inode, acl, mask);
204 posix_acl_release(acl);
205 return error;
206 }
201 } 207 }
202 return -EAGAIN; 208 return -EAGAIN;
203} 209}
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 48171f4c943d..7118f1a780a9 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -75,11 +75,14 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
75 * Returns: errno 75 * Returns: errno
76 */ 76 */
77 77
78int gfs2_check_acl(struct inode *inode, int mask) 78int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
79{ 79{
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU)
84 return -ECHILD;
85
83 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); 86 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
84 if (IS_ERR(acl)) 87 if (IS_ERR(acl))
85 return PTR_ERR(acl); 88 return PTR_ERR(acl);
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index b522b0cb39ea..a93907c8159b 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" 16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
17#define GFS2_ACL_MAX_ENTRIES 25 17#define GFS2_ACL_MAX_ENTRIES 25
18 18
19extern int gfs2_check_acl(struct inode *inode, int mask); 19extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int);
20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); 20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); 21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
22extern const struct xattr_handler gfs2_xattr_system_handler; 22extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 5476c066d4ee..3c4039d5eef1 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -763,7 +763,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
763 int metadata; 763 int metadata;
764 unsigned int revokes = 0; 764 unsigned int revokes = 0;
765 int x; 765 int x;
766 int error; 766 int error = 0;
767 767
768 if (!*top) 768 if (!*top)
769 sm->sm_first = 0; 769 sm->sm_first = 0;
@@ -780,7 +780,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
780 if (metadata) 780 if (metadata)
781 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; 781 revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
782 782
783 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); 783 if (ip != GFS2_I(sdp->sd_rindex))
784 error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
785 else if (!sdp->sd_rgrps)
786 error = gfs2_ri_update(ip);
787
784 if (error) 788 if (error)
785 return error; 789 return error;
786 790
@@ -879,7 +883,8 @@ out_rg_gunlock:
879out_rlist: 883out_rlist:
880 gfs2_rlist_free(&rlist); 884 gfs2_rlist_free(&rlist);
881out: 885out:
882 gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); 886 if (ip != GFS2_I(sdp->sd_rindex))
887 gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
883 return error; 888 return error;
884} 889}
885 890
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 6798755b3858..4a456338b873 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -11,6 +11,7 @@
11#include <linux/completion.h> 11#include <linux/completion.h>
12#include <linux/buffer_head.h> 12#include <linux/buffer_head.h>
13#include <linux/gfs2_ondisk.h> 13#include <linux/gfs2_ondisk.h>
14#include <linux/namei.h>
14#include <linux/crc32.h> 15#include <linux/crc32.h>
15 16
16#include "gfs2.h" 17#include "gfs2.h"
@@ -34,15 +35,23 @@
34 35
35static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) 36static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
36{ 37{
37 struct dentry *parent = dget_parent(dentry); 38 struct dentry *parent;
38 struct gfs2_sbd *sdp = GFS2_SB(parent->d_inode); 39 struct gfs2_sbd *sdp;
39 struct gfs2_inode *dip = GFS2_I(parent->d_inode); 40 struct gfs2_inode *dip;
40 struct inode *inode = dentry->d_inode; 41 struct inode *inode;
41 struct gfs2_holder d_gh; 42 struct gfs2_holder d_gh;
42 struct gfs2_inode *ip = NULL; 43 struct gfs2_inode *ip = NULL;
43 int error; 44 int error;
44 int had_lock = 0; 45 int had_lock = 0;
45 46
47 if (nd->flags & LOOKUP_RCU)
48 return -ECHILD;
49
50 parent = dget_parent(dentry);
51 sdp = GFS2_SB(parent->d_inode);
52 dip = GFS2_I(parent->d_inode);
53 inode = dentry->d_inode;
54
46 if (inode) { 55 if (inode) {
47 if (is_bad_inode(inode)) 56 if (is_bad_inode(inode))
48 goto invalid; 57 goto invalid;
@@ -100,13 +109,14 @@ fail:
100 return 0; 109 return 0;
101} 110}
102 111
103static int gfs2_dhash(struct dentry *dentry, struct qstr *str) 112static int gfs2_dhash(const struct dentry *dentry, const struct inode *inode,
113 struct qstr *str)
104{ 114{
105 str->hash = gfs2_disk_hash(str->name, str->len); 115 str->hash = gfs2_disk_hash(str->name, str->len);
106 return 0; 116 return 0;
107} 117}
108 118
109static int gfs2_dentry_delete(struct dentry *dentry) 119static int gfs2_dentry_delete(const struct dentry *dentry)
110{ 120{
111 struct gfs2_inode *ginode; 121 struct gfs2_inode *ginode;
112 122
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 06d582732d34..97012ecff560 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -130,7 +130,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
130 130
131 dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); 131 dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
132 if (!IS_ERR(dentry)) 132 if (!IS_ERR(dentry))
133 dentry->d_op = &gfs2_dops; 133 d_set_d_op(dentry, &gfs2_dops);
134 return dentry; 134 return dentry;
135} 135}
136 136
@@ -138,10 +138,8 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
138 struct gfs2_inum_host *inum) 138 struct gfs2_inum_host *inum)
139{ 139{
140 struct gfs2_sbd *sdp = sb->s_fs_info; 140 struct gfs2_sbd *sdp = sb->s_fs_info;
141 struct gfs2_holder i_gh;
142 struct inode *inode; 141 struct inode *inode;
143 struct dentry *dentry; 142 struct dentry *dentry;
144 int error;
145 143
146 inode = gfs2_ilookup(sb, inum->no_addr); 144 inode = gfs2_ilookup(sb, inum->no_addr);
147 if (inode) { 145 if (inode) {
@@ -152,52 +150,16 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
152 goto out_inode; 150 goto out_inode;
153 } 151 }
154 152
155 error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops, 153 inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
156 LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 154 GFS2_BLKST_DINODE);
157 if (error) 155 if (IS_ERR(inode))
158 return ERR_PTR(error); 156 return ERR_CAST(inode);
159
160 error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE);
161 if (error)
162 goto fail;
163
164 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0);
165 if (IS_ERR(inode)) {
166 error = PTR_ERR(inode);
167 goto fail;
168 }
169
170 error = gfs2_inode_refresh(GFS2_I(inode));
171 if (error) {
172 iput(inode);
173 goto fail;
174 }
175
176 /* Pick up the works we bypass in gfs2_inode_lookup */
177 if (inode->i_state & I_NEW)
178 gfs2_set_iop(inode);
179
180 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
181 iput(inode);
182 goto fail;
183 }
184
185 error = -EIO;
186 if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
187 iput(inode);
188 goto fail;
189 }
190
191 gfs2_glock_dq_uninit(&i_gh);
192 157
193out_inode: 158out_inode:
194 dentry = d_obtain_alias(inode); 159 dentry = d_obtain_alias(inode);
195 if (!IS_ERR(dentry)) 160 if (!IS_ERR(dentry))
196 dentry->d_op = &gfs2_dops; 161 d_set_d_op(dentry, &gfs2_dops);
197 return dentry; 162 return dentry;
198fail:
199 gfs2_glock_dq_uninit(&i_gh);
200 return ERR_PTR(error);
201} 163}
202 164
203static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid, 165static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index aa996471ec5c..fca6689e12e6 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -241,7 +241,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
241 !capable(CAP_LINUX_IMMUTABLE)) 241 !capable(CAP_LINUX_IMMUTABLE))
242 goto out; 242 goto out;
243 if (!IS_IMMUTABLE(inode)) { 243 if (!IS_IMMUTABLE(inode)) {
244 error = gfs2_permission(inode, MAY_WRITE); 244 error = gfs2_permission(inode, MAY_WRITE, 0);
245 if (error) 245 if (error)
246 goto out; 246 goto out;
247 } 247 }
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 87778857f099..08a8beb152e6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -541,21 +541,6 @@ out_locked:
541 spin_unlock(&gl->gl_spin); 541 spin_unlock(&gl->gl_spin);
542} 542}
543 543
544static unsigned int gfs2_lm_lock(struct gfs2_sbd *sdp, void *lock,
545 unsigned int req_state,
546 unsigned int flags)
547{
548 int ret = LM_OUT_ERROR;
549
550 if (!sdp->sd_lockstruct.ls_ops->lm_lock)
551 return req_state == LM_ST_UNLOCKED ? 0 : req_state;
552
553 if (likely(!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
554 ret = sdp->sd_lockstruct.ls_ops->lm_lock(lock,
555 req_state, flags);
556 return ret;
557}
558
559/** 544/**
560 * do_xmote - Calls the DLM to change the state of a lock 545 * do_xmote - Calls the DLM to change the state of a lock
561 * @gl: The lock state 546 * @gl: The lock state
@@ -575,13 +560,14 @@ __acquires(&gl->gl_spin)
575 560
576 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | 561 lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
577 LM_FLAG_PRIORITY); 562 LM_FLAG_PRIORITY);
578 BUG_ON(gl->gl_state == target); 563 GLOCK_BUG_ON(gl, gl->gl_state == target);
579 BUG_ON(gl->gl_state == gl->gl_target); 564 GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
580 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) && 565 if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
581 glops->go_inval) { 566 glops->go_inval) {
582 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); 567 set_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
583 do_error(gl, 0); /* Fail queued try locks */ 568 do_error(gl, 0); /* Fail queued try locks */
584 } 569 }
570 gl->gl_req = target;
585 spin_unlock(&gl->gl_spin); 571 spin_unlock(&gl->gl_spin);
586 if (glops->go_xmote_th) 572 if (glops->go_xmote_th)
587 glops->go_xmote_th(gl); 573 glops->go_xmote_th(gl);
@@ -594,15 +580,17 @@ __acquires(&gl->gl_spin)
594 gl->gl_state == LM_ST_DEFERRED) && 580 gl->gl_state == LM_ST_DEFERRED) &&
595 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) 581 !(lck_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
596 lck_flags |= LM_FLAG_TRY_1CB; 582 lck_flags |= LM_FLAG_TRY_1CB;
597 ret = gfs2_lm_lock(sdp, gl, target, lck_flags);
598 583
599 if (!(ret & LM_OUT_ASYNC)) { 584 if (sdp->sd_lockstruct.ls_ops->lm_lock) {
600 finish_xmote(gl, ret); 585 /* lock_dlm */
586 ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
587 GLOCK_BUG_ON(gl, ret);
588 } else { /* lock_nolock */
589 finish_xmote(gl, target);
601 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 590 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
602 gfs2_glock_put(gl); 591 gfs2_glock_put(gl);
603 } else {
604 GLOCK_BUG_ON(gl, ret != LM_OUT_ASYNC);
605 } 592 }
593
606 spin_lock(&gl->gl_spin); 594 spin_lock(&gl->gl_spin);
607} 595}
608 596
@@ -686,21 +674,20 @@ static void delete_work_func(struct work_struct *work)
686{ 674{
687 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete); 675 struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
688 struct gfs2_sbd *sdp = gl->gl_sbd; 676 struct gfs2_sbd *sdp = gl->gl_sbd;
689 struct gfs2_inode *ip = NULL; 677 struct gfs2_inode *ip;
690 struct inode *inode; 678 struct inode *inode;
691 u64 no_addr = 0; 679 u64 no_addr = gl->gl_name.ln_number;
680
681 ip = gl->gl_object;
682 /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
692 683
693 spin_lock(&gl->gl_spin);
694 ip = (struct gfs2_inode *)gl->gl_object;
695 if (ip) 684 if (ip)
696 no_addr = ip->i_no_addr;
697 spin_unlock(&gl->gl_spin);
698 if (ip) {
699 inode = gfs2_ilookup(sdp->sd_vfs, no_addr); 685 inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
700 if (inode) { 686 else
701 d_prune_aliases(inode); 687 inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
702 iput(inode); 688 if (inode && !IS_ERR(inode)) {
703 } 689 d_prune_aliases(inode);
690 iput(inode);
704 } 691 }
705 gfs2_glock_put(gl); 692 gfs2_glock_put(gl);
706} 693}
@@ -952,17 +939,22 @@ int gfs2_glock_wait(struct gfs2_holder *gh)
952 939
953void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) 940void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
954{ 941{
942 struct va_format vaf;
955 va_list args; 943 va_list args;
956 944
957 va_start(args, fmt); 945 va_start(args, fmt);
946
958 if (seq) { 947 if (seq) {
959 struct gfs2_glock_iter *gi = seq->private; 948 struct gfs2_glock_iter *gi = seq->private;
960 vsprintf(gi->string, fmt, args); 949 vsprintf(gi->string, fmt, args);
961 seq_printf(seq, gi->string); 950 seq_printf(seq, gi->string);
962 } else { 951 } else {
963 printk(KERN_ERR " "); 952 vaf.fmt = fmt;
964 vprintk(fmt, args); 953 vaf.va = &args;
954
955 printk(KERN_ERR " %pV", &vaf);
965 } 956 }
957
966 va_end(args); 958 va_end(args);
967} 959}
968 960
@@ -1362,24 +1354,28 @@ static int gfs2_should_freeze(const struct gfs2_glock *gl)
1362 * @gl: Pointer to the glock 1354 * @gl: Pointer to the glock
1363 * @ret: The return value from the dlm 1355 * @ret: The return value from the dlm
1364 * 1356 *
1357 * The gl_reply field is under the gl_spin lock so that it is ok
1358 * to use a bitfield shared with other glock state fields.
1365 */ 1359 */
1366 1360
1367void gfs2_glock_complete(struct gfs2_glock *gl, int ret) 1361void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
1368{ 1362{
1369 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 1363 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
1370 1364
1365 spin_lock(&gl->gl_spin);
1371 gl->gl_reply = ret; 1366 gl->gl_reply = ret;
1372 1367
1373 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) { 1368 if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_flags))) {
1374 spin_lock(&gl->gl_spin);
1375 if (gfs2_should_freeze(gl)) { 1369 if (gfs2_should_freeze(gl)) {
1376 set_bit(GLF_FROZEN, &gl->gl_flags); 1370 set_bit(GLF_FROZEN, &gl->gl_flags);
1377 spin_unlock(&gl->gl_spin); 1371 spin_unlock(&gl->gl_spin);
1378 return; 1372 return;
1379 } 1373 }
1380 spin_unlock(&gl->gl_spin);
1381 } 1374 }
1375
1376 spin_unlock(&gl->gl_spin);
1382 set_bit(GLF_REPLY_PENDING, &gl->gl_flags); 1377 set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1378 smp_wmb();
1383 gfs2_glock_hold(gl); 1379 gfs2_glock_hold(gl);
1384 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) 1380 if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
1385 gfs2_glock_put(gl); 1381 gfs2_glock_put(gl);
@@ -1627,18 +1623,17 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags)
1627static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh) 1623static int dump_holder(struct seq_file *seq, const struct gfs2_holder *gh)
1628{ 1624{
1629 struct task_struct *gh_owner = NULL; 1625 struct task_struct *gh_owner = NULL;
1630 char buffer[KSYM_SYMBOL_LEN];
1631 char flags_buf[32]; 1626 char flags_buf[32];
1632 1627
1633 sprint_symbol(buffer, gh->gh_ip);
1634 if (gh->gh_owner_pid) 1628 if (gh->gh_owner_pid)
1635 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID); 1629 gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
1636 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %s\n", 1630 gfs2_print_dbg(seq, " H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
1637 state2str(gh->gh_state), 1631 state2str(gh->gh_state),
1638 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags), 1632 hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
1639 gh->gh_error, 1633 gh->gh_error,
1640 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1, 1634 gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
1641 gh_owner ? gh_owner->comm : "(ended)", buffer); 1635 gh_owner ? gh_owner->comm : "(ended)",
1636 (void *)gh->gh_ip);
1642 return 0; 1637 return 0;
1643} 1638}
1644 1639
@@ -1783,12 +1778,13 @@ int __init gfs2_glock_init(void)
1783 } 1778 }
1784#endif 1779#endif
1785 1780
1786 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | 1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1787 WQ_HIGHPRI | WQ_FREEZEABLE, 0); 1782 WQ_HIGHPRI | WQ_FREEZEABLE, 0);
1788 if (IS_ERR(glock_workqueue)) 1783 if (IS_ERR(glock_workqueue))
1789 return PTR_ERR(glock_workqueue); 1784 return PTR_ERR(glock_workqueue);
1790 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | 1785 gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
1791 WQ_FREEZEABLE, 0); 1786 WQ_MEM_RECLAIM | WQ_FREEZEABLE,
1787 0);
1792 if (IS_ERR(gfs2_delete_workqueue)) { 1788 if (IS_ERR(gfs2_delete_workqueue)) {
1793 destroy_workqueue(glock_workqueue); 1789 destroy_workqueue(glock_workqueue);
1794 return PTR_ERR(gfs2_delete_workqueue); 1790 return PTR_ERR(gfs2_delete_workqueue);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index db1c26d6d220..691851ceb615 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -87,11 +87,10 @@ enum {
87#define GL_ASYNC 0x00000040 87#define GL_ASYNC 0x00000040
88#define GL_EXACT 0x00000080 88#define GL_EXACT 0x00000080
89#define GL_SKIP 0x00000100 89#define GL_SKIP 0x00000100
90#define GL_ATIME 0x00000200
91#define GL_NOCACHE 0x00000400 90#define GL_NOCACHE 0x00000400
92 91
93/* 92/*
94 * lm_lock() and lm_async_cb return flags 93 * lm_async_cb return flags
95 * 94 *
96 * LM_OUT_ST_MASK 95 * LM_OUT_ST_MASK
97 * Masks the lower two bits of lock state in the returned value. 96 * Masks the lower two bits of lock state in the returned value.
@@ -99,15 +98,11 @@ enum {
99 * LM_OUT_CANCELED 98 * LM_OUT_CANCELED
100 * The lock request was canceled. 99 * The lock request was canceled.
101 * 100 *
102 * LM_OUT_ASYNC
103 * The result of the request will be returned in an LM_CB_ASYNC callback.
104 *
105 */ 101 */
106 102
107#define LM_OUT_ST_MASK 0x00000003 103#define LM_OUT_ST_MASK 0x00000003
108#define LM_OUT_CANCELED 0x00000008 104#define LM_OUT_CANCELED 0x00000008
109#define LM_OUT_ASYNC 0x00000080 105#define LM_OUT_ERROR 0x00000004
110#define LM_OUT_ERROR 0x00000100
111 106
112/* 107/*
113 * lm_recovery_done() messages 108 * lm_recovery_done() messages
@@ -124,25 +119,12 @@ struct lm_lockops {
124 void (*lm_unmount) (struct gfs2_sbd *sdp); 119 void (*lm_unmount) (struct gfs2_sbd *sdp);
125 void (*lm_withdraw) (struct gfs2_sbd *sdp); 120 void (*lm_withdraw) (struct gfs2_sbd *sdp);
126 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); 121 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
127 unsigned int (*lm_lock) (struct gfs2_glock *gl, 122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
128 unsigned int req_state, unsigned int flags); 123 unsigned int flags);
129 void (*lm_cancel) (struct gfs2_glock *gl); 124 void (*lm_cancel) (struct gfs2_glock *gl);
130 const match_table_t *lm_tokens; 125 const match_table_t *lm_tokens;
131}; 126};
132 127
133#define LM_FLAG_TRY 0x00000001
134#define LM_FLAG_TRY_1CB 0x00000002
135#define LM_FLAG_NOEXP 0x00000004
136#define LM_FLAG_ANY 0x00000008
137#define LM_FLAG_PRIORITY 0x00000010
138
139#define GL_ASYNC 0x00000040
140#define GL_EXACT 0x00000080
141#define GL_SKIP 0x00000100
142#define GL_NOCACHE 0x00000400
143
144#define GLR_TRYFAILED 13
145
146extern struct workqueue_struct *gfs2_delete_workqueue; 128extern struct workqueue_struct *gfs2_delete_workqueue;
147static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) 129static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *gl)
148{ 130{
@@ -212,6 +194,8 @@ int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
212int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs); 194int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
213void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs); 195void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
214void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); 196void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
197
198__attribute__ ((format(printf, 2, 3)))
215void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); 199void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
216 200
217/** 201/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 0d149dcc04e5..263561bf1a50 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -325,7 +325,6 @@ static void trans_go_sync(struct gfs2_glock *gl)
325 325
326 if (gl->gl_state != LM_ST_UNLOCKED && 326 if (gl->gl_state != LM_ST_UNLOCKED &&
327 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { 327 test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
328 flush_workqueue(gfs2_delete_workqueue);
329 gfs2_meta_syncfs(sdp); 328 gfs2_meta_syncfs(sdp);
330 gfs2_log_shutdown(sdp); 329 gfs2_log_shutdown(sdp);
331 } 330 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 764fbb49efc8..8d3d2b4a0a7d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,12 +207,14 @@ struct gfs2_glock {
207 207
208 spinlock_t gl_spin; 208 spinlock_t gl_spin;
209 209
210 unsigned int gl_state; 210 /* State fields protected by gl_spin */
211 unsigned int gl_target; 211 unsigned int gl_state:2, /* Current state */
212 unsigned int gl_reply; 212 gl_target:2, /* Target state */
213 gl_demote_state:2, /* State requested by remote node */
214 gl_req:2, /* State in last dlm request */
215 gl_reply:8; /* Last reply from the dlm */
216
213 unsigned int gl_hash; 217 unsigned int gl_hash;
214 unsigned int gl_req;
215 unsigned int gl_demote_state; /* state requested by remote node */
216 unsigned long gl_demote_time; /* time of first demote request */ 218 unsigned long gl_demote_time; /* time of first demote request */
217 struct list_head gl_holders; 219 struct list_head gl_holders;
218 220
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 06370f8bd8cf..2232b3c780bd 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -73,49 +73,6 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); 73 return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
74} 74}
75 75
76struct gfs2_skip_data {
77 u64 no_addr;
78 int skipped;
79};
80
81static int iget_skip_test(struct inode *inode, void *opaque)
82{
83 struct gfs2_inode *ip = GFS2_I(inode);
84 struct gfs2_skip_data *data = opaque;
85
86 if (ip->i_no_addr == data->no_addr) {
87 if (inode->i_state & (I_FREEING|I_WILL_FREE)){
88 data->skipped = 1;
89 return 0;
90 }
91 return 1;
92 }
93 return 0;
94}
95
96static int iget_skip_set(struct inode *inode, void *opaque)
97{
98 struct gfs2_inode *ip = GFS2_I(inode);
99 struct gfs2_skip_data *data = opaque;
100
101 if (data->skipped)
102 return 1;
103 inode->i_ino = (unsigned long)(data->no_addr);
104 ip->i_no_addr = data->no_addr;
105 return 0;
106}
107
108static struct inode *gfs2_iget_skip(struct super_block *sb,
109 u64 no_addr)
110{
111 struct gfs2_skip_data data;
112 unsigned long hash = (unsigned long)no_addr;
113
114 data.no_addr = no_addr;
115 data.skipped = 0;
116 return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
117}
118
119/** 76/**
120 * GFS2 lookup code fills in vfs inode contents based on info obtained 77 * GFS2 lookup code fills in vfs inode contents based on info obtained
121 * from directory entry inside gfs2_inode_lookup(). This has caused issues 78 * from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -243,93 +200,54 @@ fail:
243 return ERR_PTR(error); 200 return ERR_PTR(error);
244} 201}
245 202
246/** 203struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
247 * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation 204 u64 *no_formal_ino, unsigned int blktype)
248 * and try to reclaim it by doing iput.
249 *
250 * This function assumes no rgrp locks are currently held.
251 *
252 * @sb: The super block
253 * no_addr: The inode number
254 *
255 */
256
257void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
258{ 205{
259 struct gfs2_sbd *sdp; 206 struct super_block *sb = sdp->sd_vfs;
260 struct gfs2_inode *ip; 207 struct gfs2_holder i_gh;
261 struct gfs2_glock *io_gl = NULL;
262 int error;
263 struct gfs2_holder gh;
264 struct inode *inode; 208 struct inode *inode;
209 int error;
265 210
266 inode = gfs2_iget_skip(sb, no_addr); 211 error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
267 212 LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
268 if (!inode) 213 if (error)
269 return; 214 return ERR_PTR(error);
270
271 /* If it's not a new inode, someone's using it, so leave it alone. */
272 if (!(inode->i_state & I_NEW)) {
273 iput(inode);
274 return;
275 }
276
277 ip = GFS2_I(inode);
278 sdp = GFS2_SB(inode);
279 ip->i_no_formal_ino = -1;
280 215
281 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); 216 error = gfs2_check_blk_type(sdp, no_addr, blktype);
282 if (unlikely(error)) 217 if (error)
283 goto fail; 218 goto fail;
284 ip->i_gl->gl_object = ip;
285
286 error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
287 if (unlikely(error))
288 goto fail_put;
289 219
290 set_bit(GIF_INVALID, &ip->i_flags); 220 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
291 error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT, 221 if (IS_ERR(inode))
292 &ip->i_iopen_gh); 222 goto fail;
293 if (unlikely(error))
294 goto fail_iopen;
295 223
296 ip->i_iopen_gh.gh_gl->gl_object = ip; 224 error = gfs2_inode_refresh(GFS2_I(inode));
297 gfs2_glock_put(io_gl); 225 if (error)
298 io_gl = NULL; 226 goto fail_iput;
299 227
300 inode->i_mode = DT2IF(DT_UNKNOWN); 228 /* Pick up the works we bypass in gfs2_inode_lookup */
229 if (inode->i_state & I_NEW)
230 gfs2_set_iop(inode);
301 231
302 /* 232 /* Two extra checks for NFS only */
303 * We must read the inode in order to work out its type in 233 if (no_formal_ino) {
304 * this case. Note that this doesn't happen often as we normally 234 error = -ESTALE;
305 * know the type beforehand. This code path only occurs during 235 if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
306 * unlinked inode recovery (where it is safe to do this glock, 236 goto fail_iput;
307 * which is not true in the general case).
308 */
309 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY,
310 &gh);
311 if (unlikely(error))
312 goto fail_glock;
313 237
314 /* Inode is now uptodate */ 238 error = -EIO;
315 gfs2_glock_dq_uninit(&gh); 239 if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
316 gfs2_set_iop(inode); 240 goto fail_iput;
317 241
318 /* The iput will cause it to be deleted. */ 242 error = 0;
319 iput(inode); 243 }
320 return;
321 244
322fail_glock:
323 gfs2_glock_dq(&ip->i_iopen_gh);
324fail_iopen:
325 if (io_gl)
326 gfs2_glock_put(io_gl);
327fail_put:
328 ip->i_gl->gl_object = NULL;
329 gfs2_glock_put(ip->i_gl);
330fail: 245fail:
331 iget_failed(inode); 246 gfs2_glock_dq_uninit(&i_gh);
332 return; 247 return error ? ERR_PTR(error) : inode;
248fail_iput:
249 iput(inode);
250 goto fail;
333} 251}
334 252
335static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) 253static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
@@ -591,7 +509,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
591 } 509 }
592 510
593 if (!is_root) { 511 if (!is_root) {
594 error = gfs2_permission(dir, MAY_EXEC); 512 error = gfs2_permission(dir, MAY_EXEC, 0);
595 if (error) 513 if (error)
596 goto out; 514 goto out;
597 } 515 }
@@ -621,7 +539,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
621{ 539{
622 int error; 540 int error;
623 541
624 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 542 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
625 if (error) 543 if (error)
626 return error; 544 return error;
627 545
@@ -998,17 +916,8 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
998 if (error) 916 if (error)
999 return error; 917 return error;
1000 918
1001 if ((attr->ia_valid & ATTR_SIZE) &&
1002 attr->ia_size != i_size_read(inode)) {
1003 error = vmtruncate(inode, attr->ia_size);
1004 if (error)
1005 return error;
1006 }
1007
1008 setattr_copy(inode, attr); 919 setattr_copy(inode, attr);
1009 mark_inode_dirty(inode); 920 mark_inode_dirty(inode);
1010
1011 gfs2_assert_warn(GFS2_SB(inode), !error);
1012 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 921 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1013 gfs2_dinode_out(ip, dibh->b_data); 922 gfs2_dinode_out(ip, dibh->b_data);
1014 brelse(dibh); 923 brelse(dibh);
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6720d7d5fbc6..732a183efdb3 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,9 @@ err:
99extern void gfs2_set_iop(struct inode *inode); 99extern void gfs2_set_iop(struct inode *inode);
100extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 100extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
101 u64 no_addr, u64 no_formal_ino); 101 u64 no_addr, u64 no_formal_ino);
102extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr); 102extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
103 u64 *no_formal_ino,
104 unsigned int blktype);
103extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); 105extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
104 106
105extern int gfs2_inode_refresh(struct gfs2_inode *ip); 107extern int gfs2_inode_refresh(struct gfs2_inode *ip);
@@ -111,7 +113,7 @@ extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
111extern struct inode *gfs2_createi(struct gfs2_holder *ghs, 113extern struct inode *gfs2_createi(struct gfs2_holder *ghs,
112 const struct qstr *name, 114 const struct qstr *name,
113 unsigned int mode, dev_t dev); 115 unsigned int mode, dev_t dev);
114extern int gfs2_permission(struct inode *inode, int mask); 116extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
115extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 117extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
116extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 118extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
117extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 119extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 1c09425b45fd..6e493aee28f8 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -146,15 +146,13 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags,
146 return lkf; 146 return lkf;
147} 147}
148 148
149static unsigned int gdlm_lock(struct gfs2_glock *gl, 149static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
150 unsigned int req_state, unsigned int flags) 150 unsigned int flags)
151{ 151{
152 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; 152 struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
153 int error;
154 int req; 153 int req;
155 u32 lkf; 154 u32 lkf;
156 155
157 gl->gl_req = req_state;
158 req = make_mode(req_state); 156 req = make_mode(req_state);
159 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); 157 lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req);
160 158
@@ -162,13 +160,8 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
162 * Submit the actual lock request. 160 * Submit the actual lock request.
163 */ 161 */
164 162
165 error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname, 163 return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, gl->gl_strname,
166 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 164 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
167 if (error == -EAGAIN)
168 return 0;
169 if (error)
170 return LM_OUT_ERROR;
171 return LM_OUT_ASYNC;
172} 165}
173 166
174static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 167static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 3eb1393f7b81..2aeabd4218cc 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -440,7 +440,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
440 iput(inode); 440 iput(inode);
441 return -ENOMEM; 441 return -ENOMEM;
442 } 442 }
443 dentry->d_op = &gfs2_dops; 443 d_set_d_op(dentry, &gfs2_dops);
444 *dptr = dentry; 444 *dptr = dentry;
445 return 0; 445 return 0;
446} 446}
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 12cbea7502c2..1501db4f0e6d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -106,7 +106,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
106{ 106{
107 struct inode *inode = NULL; 107 struct inode *inode = NULL;
108 108
109 dentry->d_op = &gfs2_dops; 109 d_set_d_op(dentry, &gfs2_dops);
110 110
111 inode = gfs2_lookupi(dir, &dentry->d_name, 0); 111 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
112 if (inode && IS_ERR(inode)) 112 if (inode && IS_ERR(inode))
@@ -166,7 +166,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
166 if (error) 166 if (error)
167 goto out_child; 167 goto out_child;
168 168
169 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); 169 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
170 if (error) 170 if (error)
171 goto out_gunlock; 171 goto out_gunlock;
172 172
@@ -289,7 +289,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
289 if (IS_APPEND(&dip->i_inode)) 289 if (IS_APPEND(&dip->i_inode))
290 return -EPERM; 290 return -EPERM;
291 291
292 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC); 292 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
293 if (error) 293 if (error)
294 return error; 294 return error;
295 295
@@ -822,7 +822,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
822 } 822 }
823 } 823 }
824 } else { 824 } else {
825 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC); 825 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
826 if (error) 826 if (error)
827 goto out_gunlock; 827 goto out_gunlock;
828 828
@@ -857,7 +857,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
857 /* Check out the dir to be renamed */ 857 /* Check out the dir to be renamed */
858 858
859 if (dir_rename) { 859 if (dir_rename) {
860 error = gfs2_permission(odentry->d_inode, MAY_WRITE); 860 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
861 if (error) 861 if (error)
862 goto out_gunlock; 862 goto out_gunlock;
863 } 863 }
@@ -1041,13 +1041,17 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1041 * Returns: errno 1041 * Returns: errno
1042 */ 1042 */
1043 1043
1044int gfs2_permission(struct inode *inode, int mask) 1044int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1045{ 1045{
1046 struct gfs2_inode *ip = GFS2_I(inode); 1046 struct gfs2_inode *ip;
1047 struct gfs2_holder i_gh; 1047 struct gfs2_holder i_gh;
1048 int error; 1048 int error;
1049 int unlock = 0; 1049 int unlock = 0;
1050 1050
1051 if (flags & IPERM_FLAG_RCU)
1052 return -ECHILD;
1053
1054 ip = GFS2_I(inode);
1051 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1055 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1056 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error) 1057 if (error)
@@ -1058,7 +1062,7 @@ int gfs2_permission(struct inode *inode, int mask)
1058 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) 1062 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1059 error = -EACCES; 1063 error = -EACCES;
1060 else 1064 else
1061 error = generic_permission(inode, mask, gfs2_check_acl); 1065 error = generic_permission(inode, mask, flags, gfs2_check_acl);
1062 if (unlock) 1066 if (unlock)
1063 gfs2_glock_dq_uninit(&i_gh); 1067 gfs2_glock_dq_uninit(&i_gh);
1064 1068
@@ -1069,7 +1073,6 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1069{ 1073{
1070 struct gfs2_inode *ip = GFS2_I(inode); 1074 struct gfs2_inode *ip = GFS2_I(inode);
1071 struct gfs2_sbd *sdp = GFS2_SB(inode); 1075 struct gfs2_sbd *sdp = GFS2_SB(inode);
1072 struct buffer_head *dibh;
1073 u32 ouid, ogid, nuid, ngid; 1076 u32 ouid, ogid, nuid, ngid;
1074 int error; 1077 int error;
1075 1078
@@ -1100,25 +1103,10 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1100 if (error) 1103 if (error)
1101 goto out_gunlock_q; 1104 goto out_gunlock_q;
1102 1105
1103 error = gfs2_meta_inode_buffer(ip, &dibh); 1106 error = gfs2_setattr_simple(ip, attr);
1104 if (error) 1107 if (error)
1105 goto out_end_trans; 1108 goto out_end_trans;
1106 1109
1107 if ((attr->ia_valid & ATTR_SIZE) &&
1108 attr->ia_size != i_size_read(inode)) {
1109 int error;
1110
1111 error = vmtruncate(inode, attr->ia_size);
1112 gfs2_assert_warn(sdp, !error);
1113 }
1114
1115 setattr_copy(inode, attr);
1116 mark_inode_dirty(inode);
1117
1118 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1119 gfs2_dinode_out(ip, dibh->b_data);
1120 brelse(dibh);
1121
1122 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1110 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1123 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode); 1111 u64 blocks = gfs2_get_inode_blocks(&ip->i_inode);
1124 gfs2_quota_change(ip, -blocks, ouid, ogid); 1112 gfs2_quota_change(ip, -blocks, ouid, ogid);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 58a9b9998b42..a689901963de 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
631 struct fs_disk_quota *fdq) 631 struct fs_disk_quota *fdq)
632{ 632{
633 struct inode *inode = &ip->i_inode; 633 struct inode *inode = &ip->i_inode;
634 struct gfs2_sbd *sdp = GFS2_SB(inode);
634 struct address_space *mapping = inode->i_mapping; 635 struct address_space *mapping = inode->i_mapping;
635 unsigned long index = loc >> PAGE_CACHE_SHIFT; 636 unsigned long index = loc >> PAGE_CACHE_SHIFT;
636 unsigned offset = loc & (PAGE_CACHE_SIZE - 1); 637 unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
@@ -658,13 +659,17 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
658 qd->qd_qb.qb_value = qp->qu_value; 659 qd->qd_qb.qb_value = qp->qu_value;
659 if (fdq) { 660 if (fdq) {
660 if (fdq->d_fieldmask & FS_DQ_BSOFT) { 661 if (fdq->d_fieldmask & FS_DQ_BSOFT) {
661 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit); 662 qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
662 qd->qd_qb.qb_warn = qp->qu_warn; 663 qd->qd_qb.qb_warn = qp->qu_warn;
663 } 664 }
664 if (fdq->d_fieldmask & FS_DQ_BHARD) { 665 if (fdq->d_fieldmask & FS_DQ_BHARD) {
665 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit); 666 qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
666 qd->qd_qb.qb_limit = qp->qu_limit; 667 qd->qd_qb.qb_limit = qp->qu_limit;
667 } 668 }
669 if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
670 qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
671 qd->qd_qb.qb_value = qp->qu_value;
672 }
668 } 673 }
669 674
670 /* Write the quota into the quota file on disk */ 675 /* Write the quota into the quota file on disk */
@@ -1497,9 +1502,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
1497 fdq->d_version = FS_DQUOT_VERSION; 1502 fdq->d_version = FS_DQUOT_VERSION;
1498 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA; 1503 fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
1499 fdq->d_id = id; 1504 fdq->d_id = id;
1500 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit); 1505 fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
1501 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn); 1506 fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
1502 fdq->d_bcount = be64_to_cpu(qlvb->qb_value); 1507 fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
1503 1508
1504 gfs2_glock_dq_uninit(&q_gh); 1509 gfs2_glock_dq_uninit(&q_gh);
1505out: 1510out:
@@ -1508,7 +1513,7 @@ out:
1508} 1513}
1509 1514
1510/* GFS2 only supports a subset of the XFS fields */ 1515/* GFS2 only supports a subset of the XFS fields */
1511#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD) 1516#define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD|FS_DQ_BCOUNT)
1512 1517
1513static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, 1518static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1514 struct fs_disk_quota *fdq) 1519 struct fs_disk_quota *fdq)
@@ -1566,11 +1571,17 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1566 1571
1567 /* If nothing has changed, this is a no-op */ 1572 /* If nothing has changed, this is a no-op */
1568 if ((fdq->d_fieldmask & FS_DQ_BSOFT) && 1573 if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
1569 (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn))) 1574 ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
1570 fdq->d_fieldmask ^= FS_DQ_BSOFT; 1575 fdq->d_fieldmask ^= FS_DQ_BSOFT;
1576
1571 if ((fdq->d_fieldmask & FS_DQ_BHARD) && 1577 if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
1572 (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit))) 1578 ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
1573 fdq->d_fieldmask ^= FS_DQ_BHARD; 1579 fdq->d_fieldmask ^= FS_DQ_BHARD;
1580
1581 if ((fdq->d_fieldmask & FS_DQ_BCOUNT) &&
1582 ((fdq->d_bcount >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_value)))
1583 fdq->d_fieldmask ^= FS_DQ_BCOUNT;
1584
1574 if (fdq->d_fieldmask == 0) 1585 if (fdq->d_fieldmask == 0)
1575 goto out_i; 1586 goto out_i;
1576 1587
@@ -1619,4 +1630,3 @@ const struct quotactl_ops gfs2_quotactl_ops = {
1619 .get_dqblk = gfs2_get_dqblk, 1630 .get_dqblk = gfs2_get_dqblk,
1620 .set_dqblk = gfs2_set_dqblk, 1631 .set_dqblk = gfs2_set_dqblk,
1621}; 1632};
1622
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bef3ab6cf5c1..7293ea27020c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
500 for (rgrps = 0;; rgrps++) { 500 for (rgrps = 0;; rgrps++) {
501 loff_t pos = rgrps * sizeof(struct gfs2_rindex); 501 loff_t pos = rgrps * sizeof(struct gfs2_rindex);
502 502
503 if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) 503 if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))
504 break; 504 break;
505 error = gfs2_internal_read(ip, &ra_state, buf, &pos, 505 error = gfs2_internal_read(ip, &ra_state, buf, &pos,
506 sizeof(struct gfs2_rindex)); 506 sizeof(struct gfs2_rindex));
@@ -583,7 +583,7 @@ static int read_rindex_entry(struct gfs2_inode *ip,
583 * Returns: 0 on successful update, error code otherwise 583 * Returns: 0 on successful update, error code otherwise
584 */ 584 */
585 585
586static int gfs2_ri_update(struct gfs2_inode *ip) 586int gfs2_ri_update(struct gfs2_inode *ip)
587{ 587{
588 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 588 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
589 struct inode *inode = &ip->i_inode; 589 struct inode *inode = &ip->i_inode;
@@ -614,46 +614,6 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
614} 614}
615 615
616/** 616/**
617 * gfs2_ri_update_special - Pull in a new resource index from the disk
618 *
619 * This is a special version that's safe to call from gfs2_inplace_reserve_i.
620 * In this case we know that we don't have any resource groups in memory yet.
621 *
622 * @ip: pointer to the rindex inode
623 *
624 * Returns: 0 on successful update, error code otherwise
625 */
626static int gfs2_ri_update_special(struct gfs2_inode *ip)
627{
628 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
629 struct inode *inode = &ip->i_inode;
630 struct file_ra_state ra_state;
631 struct gfs2_rgrpd *rgd;
632 unsigned int max_data = 0;
633 int error;
634
635 file_ra_state_init(&ra_state, inode->i_mapping);
636 for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) {
637 /* Ignore partials */
638 if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) >
639 i_size_read(inode))
640 break;
641 error = read_rindex_entry(ip, &ra_state);
642 if (error) {
643 clear_rgrpdi(sdp);
644 return error;
645 }
646 }
647 list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
648 if (rgd->rd_data > max_data)
649 max_data = rgd->rd_data;
650 sdp->sd_max_rg_data = max_data;
651
652 sdp->sd_rindex_uptodate = 1;
653 return 0;
654}
655
656/**
657 * gfs2_rindex_hold - Grab a lock on the rindex 617 * gfs2_rindex_hold - Grab a lock on the rindex
658 * @sdp: The GFS2 superblock 618 * @sdp: The GFS2 superblock
659 * @ri_gh: the glock holder 619 * @ri_gh: the glock holder
@@ -963,17 +923,18 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
963 * The inode, if one has been found, in inode. 923 * The inode, if one has been found, in inode.
964 */ 924 */
965 925
966static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, 926static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
967 u64 skip)
968{ 927{
969 u32 goal = 0, block; 928 u32 goal = 0, block;
970 u64 no_addr; 929 u64 no_addr;
971 struct gfs2_sbd *sdp = rgd->rd_sbd; 930 struct gfs2_sbd *sdp = rgd->rd_sbd;
972 unsigned int n; 931 unsigned int n;
932 struct gfs2_glock *gl;
933 struct gfs2_inode *ip;
934 int error;
935 int found = 0;
973 936
974 for(;;) { 937 while (goal < rgd->rd_data) {
975 if (goal >= rgd->rd_data)
976 break;
977 down_write(&sdp->sd_log_flush_lock); 938 down_write(&sdp->sd_log_flush_lock);
978 n = 1; 939 n = 1;
979 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, 940 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
@@ -990,11 +951,32 @@ static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
990 if (no_addr == skip) 951 if (no_addr == skip)
991 continue; 952 continue;
992 *last_unlinked = no_addr; 953 *last_unlinked = no_addr;
993 return no_addr; 954
955 error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
956 if (error)
957 continue;
958
959 /* If the inode is already in cache, we can ignore it here
960 * because the existing inode disposal code will deal with
961 * it when all refs have gone away. Accessing gl_object like
962 * this is not safe in general. Here it is ok because we do
963 * not dereference the pointer, and we only need an approx
964 * answer to whether it is NULL or not.
965 */
966 ip = gl->gl_object;
967
968 if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
969 gfs2_glock_put(gl);
970 else
971 found++;
972
973 /* Limit reclaim to sensible number of tasks */
974 if (found > 2*NR_CPUS)
975 return;
994 } 976 }
995 977
996 rgd->rd_flags &= ~GFS2_RDF_CHECK; 978 rgd->rd_flags &= ~GFS2_RDF_CHECK;
997 return 0; 979 return;
998} 980}
999 981
1000/** 982/**
@@ -1075,11 +1057,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
1075 * Try to acquire rgrp in way which avoids contending with others. 1057 * Try to acquire rgrp in way which avoids contending with others.
1076 * 1058 *
1077 * Returns: errno 1059 * Returns: errno
1078 * unlinked: the block address of an unlinked block to be reclaimed
1079 */ 1060 */
1080 1061
1081static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, 1062static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1082 u64 *last_unlinked)
1083{ 1063{
1084 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1064 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1085 struct gfs2_rgrpd *rgd, *begin = NULL; 1065 struct gfs2_rgrpd *rgd, *begin = NULL;
@@ -1089,7 +1069,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
1089 int loops = 0; 1069 int loops = 0;
1090 int error, rg_locked; 1070 int error, rg_locked;
1091 1071
1092 *unlinked = 0;
1093 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); 1072 rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
1094 1073
1095 while (rgd) { 1074 while (rgd) {
@@ -1106,17 +1085,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
1106 case 0: 1085 case 0:
1107 if (try_rgrp_fit(rgd, al)) 1086 if (try_rgrp_fit(rgd, al))
1108 goto out; 1087 goto out;
1109 /* If the rg came in already locked, there's no 1088 if (rgd->rd_flags & GFS2_RDF_CHECK)
1110 way we can recover from a failed try_rgrp_unlink 1089 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1111 because that would require an iput which can only
1112 happen after the rgrp is unlocked. */
1113 if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
1114 *unlinked = try_rgrp_unlink(rgd, last_unlinked,
1115 ip->i_no_addr);
1116 if (!rg_locked) 1090 if (!rg_locked)
1117 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1091 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1118 if (*unlinked)
1119 return -EAGAIN;
1120 /* fall through */ 1092 /* fall through */
1121 case GLR_TRYFAILED: 1093 case GLR_TRYFAILED:
1122 rgd = recent_rgrp_next(rgd); 1094 rgd = recent_rgrp_next(rgd);
@@ -1145,13 +1117,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
1145 case 0: 1117 case 0:
1146 if (try_rgrp_fit(rgd, al)) 1118 if (try_rgrp_fit(rgd, al))
1147 goto out; 1119 goto out;
1148 if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) 1120 if (rgd->rd_flags & GFS2_RDF_CHECK)
1149 *unlinked = try_rgrp_unlink(rgd, last_unlinked, 1121 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1150 ip->i_no_addr);
1151 if (!rg_locked) 1122 if (!rg_locked)
1152 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1123 gfs2_glock_dq_uninit(&al->al_rgd_gh);
1153 if (*unlinked)
1154 return -EAGAIN;
1155 break; 1124 break;
1156 1125
1157 case GLR_TRYFAILED: 1126 case GLR_TRYFAILED:
@@ -1204,12 +1173,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
1204 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1173 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1205 struct gfs2_alloc *al = ip->i_alloc; 1174 struct gfs2_alloc *al = ip->i_alloc;
1206 int error = 0; 1175 int error = 0;
1207 u64 last_unlinked = NO_BLOCK, unlinked; 1176 u64 last_unlinked = NO_BLOCK;
1177 int tries = 0;
1208 1178
1209 if (gfs2_assert_warn(sdp, al->al_requested)) 1179 if (gfs2_assert_warn(sdp, al->al_requested))
1210 return -EINVAL; 1180 return -EINVAL;
1211 1181
1212try_again:
1213 if (hold_rindex) { 1182 if (hold_rindex) {
1214 /* We need to hold the rindex unless the inode we're using is 1183 /* We need to hold the rindex unless the inode we're using is
1215 the rindex itself, in which case it's already held. */ 1184 the rindex itself, in which case it's already held. */
@@ -1217,32 +1186,33 @@ try_again:
1217 error = gfs2_rindex_hold(sdp, &al->al_ri_gh); 1186 error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
1218 else if (!sdp->sd_rgrps) /* We may not have the rindex read 1187 else if (!sdp->sd_rgrps) /* We may not have the rindex read
1219 in, so: */ 1188 in, so: */
1220 error = gfs2_ri_update_special(ip); 1189 error = gfs2_ri_update(ip);
1190 if (error)
1191 return error;
1221 } 1192 }
1222 1193
1223 if (error) 1194try_again:
1224 return error; 1195 do {
1196 error = get_local_rgrp(ip, &last_unlinked);
1197 /* If there is no space, flushing the log may release some */
1198 if (error) {
1199 if (ip == GFS2_I(sdp->sd_rindex) &&
1200 !sdp->sd_rindex_uptodate) {
1201 error = gfs2_ri_update(ip);
1202 if (error)
1203 return error;
1204 goto try_again;
1205 }
1206 gfs2_log_flush(sdp, NULL);
1207 }
1208 } while (error && tries++ < 3);
1225 1209
1226 /* Find an rgrp suitable for allocation. If it encounters any unlinked
1227 dinodes along the way, error will equal -EAGAIN and unlinked will
1228 contains it block address. We then need to look up that inode and
1229 try to free it, and try the allocation again. */
1230 error = get_local_rgrp(ip, &unlinked, &last_unlinked);
1231 if (error) { 1210 if (error) {
1232 if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) 1211 if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
1233 gfs2_glock_dq_uninit(&al->al_ri_gh); 1212 gfs2_glock_dq_uninit(&al->al_ri_gh);
1234 if (error != -EAGAIN) 1213 return error;
1235 return error;
1236
1237 gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked);
1238 /* regardless of whether or not gfs2_process_unlinked_inode
1239 was successful, we don't want to repeat it again. */
1240 last_unlinked = unlinked;
1241 gfs2_log_flush(sdp, NULL);
1242 error = 0;
1243
1244 goto try_again;
1245 } 1214 }
1215
1246 /* no error, so we have the rgrp set in the inode's allocation. */ 1216 /* no error, so we have the rgrp set in the inode's allocation. */
1247 al->al_file = file; 1217 al->al_file = file;
1248 al->al_line = line; 1218 al->al_line = line;
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 0e35c0466f9a..50c2bb04369c 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -48,6 +48,7 @@ extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
48 48
49extern void gfs2_inplace_release(struct gfs2_inode *ip); 49extern void gfs2_inplace_release(struct gfs2_inode *ip);
50 50
51extern int gfs2_ri_update(struct gfs2_inode *ip);
51extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
52extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
53 54
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 2b2c4997430b..16c2ecac7eb7 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1405,11 +1405,18 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
1405 return &ip->i_inode; 1405 return &ip->i_inode;
1406} 1406}
1407 1407
1408static void gfs2_destroy_inode(struct inode *inode) 1408static void gfs2_i_callback(struct rcu_head *head)
1409{ 1409{
1410 struct inode *inode = container_of(head, struct inode, i_rcu);
1411 INIT_LIST_HEAD(&inode->i_dentry);
1410 kmem_cache_free(gfs2_inode_cachep, inode); 1412 kmem_cache_free(gfs2_inode_cachep, inode);
1411} 1413}
1412 1414
1415static void gfs2_destroy_inode(struct inode *inode)
1416{
1417 call_rcu(&inode->i_rcu, gfs2_i_callback);
1418}
1419
1413const struct super_operations gfs2_super_ops = { 1420const struct super_operations gfs2_super_ops = {
1414 .alloc_inode = gfs2_alloc_inode, 1421 .alloc_inode = gfs2_alloc_inode,
1415 .destroy_inode = gfs2_destroy_inode, 1422 .destroy_inode = gfs2_destroy_inode,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 30b58f07c8a6..439b61c03262 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1296,10 +1296,8 @@ fail:
1296 1296
1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data) 1297int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1298{ 1298{
1299 struct inode *inode = &ip->i_inode;
1300 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1299 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1301 struct gfs2_ea_location el; 1300 struct gfs2_ea_location el;
1302 struct buffer_head *dibh;
1303 int error; 1301 int error;
1304 1302
1305 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el); 1303 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, GFS2_POSIX_ACL_ACCESS, &el);
@@ -1321,26 +1319,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
1321 if (error) 1319 if (error)
1322 return error; 1320 return error;
1323 1321
1324 error = gfs2_meta_inode_buffer(ip, &dibh); 1322 error = gfs2_setattr_simple(ip, attr);
1325 if (error)
1326 goto out_trans_end;
1327
1328 if ((attr->ia_valid & ATTR_SIZE) &&
1329 attr->ia_size != i_size_read(inode)) {
1330 int error;
1331
1332 error = vmtruncate(inode, attr->ia_size);
1333 gfs2_assert_warn(GFS2_SB(inode), !error);
1334 }
1335
1336 setattr_copy(inode, attr);
1337 mark_inode_dirty(inode);
1338
1339 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1340 gfs2_dinode_out(ip, dibh->b_data);
1341 brelse(dibh);
1342
1343out_trans_end:
1344 gfs2_trans_end(sdp); 1323 gfs2_trans_end(sdp);
1345 return error; 1324 return error;
1346} 1325}
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 2b3b8611b41b..ea4aefe7c652 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -25,7 +25,7 @@ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
25 struct inode *inode = NULL; 25 struct inode *inode = NULL;
26 int res; 26 int res;
27 27
28 dentry->d_op = &hfs_dentry_operations; 28 d_set_d_op(dentry, &hfs_dentry_operations);
29 29
30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd); 30 hfs_find_init(HFS_SB(dir->i_sb)->cat_tree, &fd);
31 hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name); 31 hfs_cat_build_key(dir->i_sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index c8cffb81e849..ad97c2d58287 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -213,10 +213,14 @@ extern int hfs_part_find(struct super_block *, sector_t *, sector_t *);
213/* string.c */ 213/* string.c */
214extern const struct dentry_operations hfs_dentry_operations; 214extern const struct dentry_operations hfs_dentry_operations;
215 215
216extern int hfs_hash_dentry(struct dentry *, struct qstr *); 216extern int hfs_hash_dentry(const struct dentry *, const struct inode *,
217 struct qstr *);
217extern int hfs_strcmp(const unsigned char *, unsigned int, 218extern int hfs_strcmp(const unsigned char *, unsigned int,
218 const unsigned char *, unsigned int); 219 const unsigned char *, unsigned int);
219extern int hfs_compare_dentry(struct dentry *, struct qstr *, struct qstr *); 220extern int hfs_compare_dentry(const struct dentry *parent,
221 const struct inode *pinode,
222 const struct dentry *dentry, const struct inode *inode,
223 unsigned int len, const char *str, const struct qstr *name);
220 224
221/* trans.c */ 225/* trans.c */
222extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); 226extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
diff --git a/fs/hfs/string.c b/fs/hfs/string.c
index 927a5af79428..495a976a3cc9 100644
--- a/fs/hfs/string.c
+++ b/fs/hfs/string.c
@@ -51,7 +51,8 @@ static unsigned char caseorder[256] = {
51/* 51/*
52 * Hash a string to an integer in a case-independent way 52 * Hash a string to an integer in a case-independent way
53 */ 53 */
54int hfs_hash_dentry(struct dentry *dentry, struct qstr *this) 54int hfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
55 struct qstr *this)
55{ 56{
56 const unsigned char *name = this->name; 57 const unsigned char *name = this->name;
57 unsigned int hash, len = this->len; 58 unsigned int hash, len = this->len;
@@ -92,21 +93,21 @@ int hfs_strcmp(const unsigned char *s1, unsigned int len1,
92 * Test for equality of two strings in the HFS filename character ordering. 93 * Test for equality of two strings in the HFS filename character ordering.
93 * return 1 on failure and 0 on success 94 * return 1 on failure and 0 on success
94 */ 95 */
95int hfs_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr *s2) 96int hfs_compare_dentry(const struct dentry *parent, const struct inode *pinode,
97 const struct dentry *dentry, const struct inode *inode,
98 unsigned int len, const char *str, const struct qstr *name)
96{ 99{
97 const unsigned char *n1, *n2; 100 const unsigned char *n1, *n2;
98 int len;
99 101
100 len = s1->len;
101 if (len >= HFS_NAMELEN) { 102 if (len >= HFS_NAMELEN) {
102 if (s2->len < HFS_NAMELEN) 103 if (name->len < HFS_NAMELEN)
103 return 1; 104 return 1;
104 len = HFS_NAMELEN; 105 len = HFS_NAMELEN;
105 } else if (len != s2->len) 106 } else if (len != name->len)
106 return 1; 107 return 1;
107 108
108 n1 = s1->name; 109 n1 = str;
109 n2 = s2->name; 110 n2 = name->name;
110 while (len--) { 111 while (len--) {
111 if (caseorder[*n1++] != caseorder[*n2++]) 112 if (caseorder[*n1++] != caseorder[*n2++])
112 return 1; 113 return 1;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 4824c27cebb8..0bef62aa4f42 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -167,11 +167,18 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
167 return i ? &i->vfs_inode : NULL; 167 return i ? &i->vfs_inode : NULL;
168} 168}
169 169
170static void hfs_destroy_inode(struct inode *inode) 170static void hfs_i_callback(struct rcu_head *head)
171{ 171{
172 struct inode *inode = container_of(head, struct inode, i_rcu);
173 INIT_LIST_HEAD(&inode->i_dentry);
172 kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); 174 kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
173} 175}
174 176
177static void hfs_destroy_inode(struct inode *inode)
178{
179 call_rcu(&inode->i_rcu, hfs_i_callback);
180}
181
175static const struct super_operations hfs_super_operations = { 182static const struct super_operations hfs_super_operations = {
176 .alloc_inode = hfs_alloc_inode, 183 .alloc_inode = hfs_alloc_inode,
177 .destroy_inode = hfs_destroy_inode, 184 .destroy_inode = hfs_destroy_inode,
@@ -427,7 +434,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
427 if (!sb->s_root) 434 if (!sb->s_root)
428 goto bail_iput; 435 goto bail_iput;
429 436
430 sb->s_root->d_op = &hfs_dentry_operations; 437 d_set_d_op(sb->s_root, &hfs_dentry_operations);
431 438
432 /* everything's okay */ 439 /* everything's okay */
433 return 0; 440 return 0;
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 7478f5c219aa..19cf291eb91f 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -8,15 +8,20 @@
8 * This file contains the code to do various system dependent things. 8 * This file contains the code to do various system dependent things.
9 */ 9 */
10 10
11#include <linux/namei.h>
11#include "hfs_fs.h" 12#include "hfs_fs.h"
12 13
13/* dentry case-handling: just lowercase everything */ 14/* dentry case-handling: just lowercase everything */
14 15
15static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) 16static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd)
16{ 17{
17 struct inode *inode = dentry->d_inode; 18 struct inode *inode;
18 int diff; 19 int diff;
19 20
21 if (nd->flags & LOOKUP_RCU)
22 return -ECHILD;
23
24 inode = dentry->d_inode;
20 if(!inode) 25 if(!inode)
21 return 1; 26 return 1;
22 27
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index f611d55c9f5e..f896dc843026 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -37,7 +37,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
37 37
38 sb = dir->i_sb; 38 sb = dir->i_sb;
39 39
40 dentry->d_op = &hfsplus_dentry_operations; 40 d_set_d_op(dentry, &hfsplus_dentry_operations);
41 dentry->d_fsdata = NULL; 41 dentry->d_fsdata = NULL;
42 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 42 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
43 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); 43 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index f7cbdf89ac9b..d6857523336d 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -426,9 +426,12 @@ int hfsplus_uni2asc(struct super_block *,
426 const struct hfsplus_unistr *, char *, int *); 426 const struct hfsplus_unistr *, char *, int *);
427int hfsplus_asc2uni(struct super_block *, 427int hfsplus_asc2uni(struct super_block *,
428 struct hfsplus_unistr *, const char *, int); 428 struct hfsplus_unistr *, const char *, int);
429int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str); 429int hfsplus_hash_dentry(const struct dentry *dentry,
430int hfsplus_compare_dentry(struct dentry *dentry, 430 const struct inode *inode, struct qstr *str);
431 struct qstr *s1, struct qstr *s2); 431int hfsplus_compare_dentry(const struct dentry *parent,
432 const struct inode *pinode,
433 const struct dentry *dentry, const struct inode *inode,
434 unsigned int len, const char *str, const struct qstr *name);
432 435
433/* wrapper.c */ 436/* wrapper.c */
434int hfsplus_read_wrapper(struct super_block *); 437int hfsplus_read_wrapper(struct super_block *);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 3c9f30e9cd36..6ee6ad20acf2 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -450,7 +450,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
450 err = -ENOMEM; 450 err = -ENOMEM;
451 goto cleanup; 451 goto cleanup;
452 } 452 }
453 sb->s_root->d_op = &hfsplus_dentry_operations; 453 d_set_d_op(sb->s_root, &hfsplus_dentry_operations);
454 454
455 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 455 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
456 str.name = HFSP_HIDDENDIR_NAME; 456 str.name = HFSP_HIDDENDIR_NAME;
@@ -516,11 +516,19 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb)
516 return i ? &i->vfs_inode : NULL; 516 return i ? &i->vfs_inode : NULL;
517} 517}
518 518
519static void hfsplus_destroy_inode(struct inode *inode) 519static void hfsplus_i_callback(struct rcu_head *head)
520{ 520{
521 struct inode *inode = container_of(head, struct inode, i_rcu);
522
523 INIT_LIST_HEAD(&inode->i_dentry);
521 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); 524 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
522} 525}
523 526
527static void hfsplus_destroy_inode(struct inode *inode)
528{
529 call_rcu(&inode->i_rcu, hfsplus_i_callback);
530}
531
524#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) 532#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
525 533
526static struct dentry *hfsplus_mount(struct file_system_type *fs_type, 534static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index 7dd90a540546..a3f0bfcc881e 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -324,7 +324,8 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
324 * Composed unicode characters are decomposed and case-folding is performed 324 * Composed unicode characters are decomposed and case-folding is performed
325 * if the appropriate bits are (un)set on the superblock. 325 * if the appropriate bits are (un)set on the superblock.
326 */ 326 */
327int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) 327int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
328 struct qstr *str)
328{ 329{
329 struct super_block *sb = dentry->d_sb; 330 struct super_block *sb = dentry->d_sb;
330 const char *astr; 331 const char *astr;
@@ -367,10 +368,12 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str)
367 * Composed unicode characters are decomposed and case-folding is performed 368 * Composed unicode characters are decomposed and case-folding is performed
368 * if the appropriate bits are (un)set on the superblock. 369 * if the appropriate bits are (un)set on the superblock.
369 */ 370 */
370int hfsplus_compare_dentry(struct dentry *dentry, 371int hfsplus_compare_dentry(const struct dentry *parent,
371 struct qstr *s1, struct qstr *s2) 372 const struct inode *pinode,
373 const struct dentry *dentry, const struct inode *inode,
374 unsigned int len, const char *str, const struct qstr *name)
372{ 375{
373 struct super_block *sb = dentry->d_sb; 376 struct super_block *sb = parent->d_sb;
374 int casefold, decompose, size; 377 int casefold, decompose, size;
375 int dsize1, dsize2, len1, len2; 378 int dsize1, dsize2, len1, len2;
376 const u16 *dstr1, *dstr2; 379 const u16 *dstr1, *dstr2;
@@ -380,10 +383,10 @@ int hfsplus_compare_dentry(struct dentry *dentry,
380 383
381 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 384 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
382 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 385 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
383 astr1 = s1->name; 386 astr1 = str;
384 len1 = s1->len; 387 len1 = len;
385 astr2 = s2->name; 388 astr2 = name->name;
386 len2 = s2->len; 389 len2 = name->len;
387 dsize1 = dsize2 = 0; 390 dsize1 = dsize2 = 0;
388 dstr1 = dstr2 = NULL; 391 dstr1 = dstr2 = NULL;
389 392
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2c0f148a49e6..d3244d949a4e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -32,7 +32,7 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
32 32
33#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode) 33#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_path.dentry->d_inode)
34 34
35static int hostfs_d_delete(struct dentry *dentry) 35static int hostfs_d_delete(const struct dentry *dentry)
36{ 36{
37 return 1; 37 return 1;
38} 38}
@@ -92,12 +92,10 @@ __uml_setup("hostfs=", hostfs_args,
92 92
93static char *__dentry_name(struct dentry *dentry, char *name) 93static char *__dentry_name(struct dentry *dentry, char *name)
94{ 94{
95 char *p = __dentry_path(dentry, name, PATH_MAX); 95 char *p = dentry_path_raw(dentry, name, PATH_MAX);
96 char *root; 96 char *root;
97 size_t len; 97 size_t len;
98 98
99 spin_unlock(&dcache_lock);
100
101 root = dentry->d_sb->s_fs_info; 99 root = dentry->d_sb->s_fs_info;
102 len = strlen(root); 100 len = strlen(root);
103 if (IS_ERR(p)) { 101 if (IS_ERR(p)) {
@@ -123,25 +121,23 @@ static char *dentry_name(struct dentry *dentry)
123 if (!name) 121 if (!name)
124 return NULL; 122 return NULL;
125 123
126 spin_lock(&dcache_lock);
127 return __dentry_name(dentry, name); /* will unlock */ 124 return __dentry_name(dentry, name); /* will unlock */
128} 125}
129 126
130static char *inode_name(struct inode *ino) 127static char *inode_name(struct inode *ino)
131{ 128{
132 struct dentry *dentry; 129 struct dentry *dentry;
133 char *name = __getname(); 130 char *name;
134 if (!name)
135 return NULL;
136 131
137 spin_lock(&dcache_lock); 132 dentry = d_find_alias(ino);
138 if (list_empty(&ino->i_dentry)) { 133 if (!dentry)
139 spin_unlock(&dcache_lock);
140 __putname(name);
141 return NULL; 134 return NULL;
142 } 135
143 dentry = list_first_entry(&ino->i_dentry, struct dentry, d_alias); 136 name = dentry_name(dentry);
144 return __dentry_name(dentry, name); /* will unlock */ 137
138 dput(dentry);
139
140 return name;
145} 141}
146 142
147static char *follow_link(char *link) 143static char *follow_link(char *link)
@@ -251,11 +247,18 @@ static void hostfs_evict_inode(struct inode *inode)
251 } 247 }
252} 248}
253 249
254static void hostfs_destroy_inode(struct inode *inode) 250static void hostfs_i_callback(struct rcu_head *head)
255{ 251{
252 struct inode *inode = container_of(head, struct inode, i_rcu);
253 INIT_LIST_HEAD(&inode->i_dentry);
256 kfree(HOSTFS_I(inode)); 254 kfree(HOSTFS_I(inode));
257} 255}
258 256
257static void hostfs_destroy_inode(struct inode *inode)
258{
259 call_rcu(&inode->i_rcu, hostfs_i_callback);
260}
261
259static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 262static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
260{ 263{
261 const char *root_path = vfs->mnt_sb->s_fs_info; 264 const char *root_path = vfs->mnt_sb->s_fs_info;
@@ -609,7 +612,7 @@ struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
609 goto out_put; 612 goto out_put;
610 613
611 d_add(dentry, inode); 614 d_add(dentry, inode);
612 dentry->d_op = &hostfs_dentry_ops; 615 d_set_d_op(dentry, &hostfs_dentry_ops);
613 return NULL; 616 return NULL;
614 617
615 out_put: 618 out_put:
@@ -746,11 +749,14 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
746 return err; 749 return err;
747} 750}
748 751
749int hostfs_permission(struct inode *ino, int desired) 752int hostfs_permission(struct inode *ino, int desired, unsigned int flags)
750{ 753{
751 char *name; 754 char *name;
752 int r = 0, w = 0, x = 0, err; 755 int r = 0, w = 0, x = 0, err;
753 756
757 if (flags & IPERM_FLAG_RCU)
758 return -ECHILD;
759
754 if (desired & MAY_READ) r = 1; 760 if (desired & MAY_READ) r = 1;
755 if (desired & MAY_WRITE) w = 1; 761 if (desired & MAY_WRITE) w = 1;
756 if (desired & MAY_EXEC) x = 1; 762 if (desired & MAY_EXEC) x = 1;
@@ -765,7 +771,7 @@ int hostfs_permission(struct inode *ino, int desired)
765 err = access_file(name, r, w, x); 771 err = access_file(name, r, w, x);
766 __putname(name); 772 __putname(name);
767 if (!err) 773 if (!err)
768 err = generic_permission(ino, desired, NULL); 774 err = generic_permission(ino, desired, flags, NULL);
769 return err; 775 return err;
770} 776}
771 777
diff --git a/fs/hpfs/dentry.c b/fs/hpfs/dentry.c
index 67d9d36b3d5f..32c13a94e1e9 100644
--- a/fs/hpfs/dentry.c
+++ b/fs/hpfs/dentry.c
@@ -12,7 +12,8 @@
12 * Note: the dentry argument is the parent dentry. 12 * Note: the dentry argument is the parent dentry.
13 */ 13 */
14 14
15static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr) 15static int hpfs_hash_dentry(const struct dentry *dentry, const struct inode *inode,
16 struct qstr *qstr)
16{ 17{
17 unsigned long hash; 18 unsigned long hash;
18 int i; 19 int i;
@@ -34,19 +35,25 @@ static int hpfs_hash_dentry(struct dentry *dentry, struct qstr *qstr)
34 return 0; 35 return 0;
35} 36}
36 37
37static int hpfs_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 38static int hpfs_compare_dentry(const struct dentry *parent,
39 const struct inode *pinode,
40 const struct dentry *dentry, const struct inode *inode,
41 unsigned int len, const char *str, const struct qstr *name)
38{ 42{
39 unsigned al=a->len; 43 unsigned al = len;
40 unsigned bl=b->len; 44 unsigned bl = name->len;
41 hpfs_adjust_length(a->name, &al); 45
46 hpfs_adjust_length(str, &al);
42 /*hpfs_adjust_length(b->name, &bl);*/ 47 /*hpfs_adjust_length(b->name, &bl);*/
43 /* 'a' is the qstr of an already existing dentry, so the name 48
44 * must be valid. 'b' must be validated first. 49 /*
50 * 'str' is the nane of an already existing dentry, so the name
51 * must be valid. 'name' must be validated first.
45 */ 52 */
46 53
47 if (hpfs_chk_name(b->name, &bl)) 54 if (hpfs_chk_name(name->name, &bl))
48 return 1; 55 return 1;
49 if (hpfs_compare_names(dentry->d_sb, a->name, al, b->name, bl, 0)) 56 if (hpfs_compare_names(parent->d_sb, str, al, name->name, bl, 0))
50 return 1; 57 return 1;
51 return 0; 58 return 0;
52} 59}
@@ -58,5 +65,5 @@ static const struct dentry_operations hpfs_dentry_operations = {
58 65
59void hpfs_set_dentry_operations(struct dentry *dentry) 66void hpfs_set_dentry_operations(struct dentry *dentry)
60{ 67{
61 dentry->d_op = &hpfs_dentry_operations; 68 d_set_d_op(dentry, &hpfs_dentry_operations);
62} 69}
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 11c2b4080f65..f4ad9e31ddc4 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -419,7 +419,7 @@ again:
419 unlock_kernel(); 419 unlock_kernel();
420 return -ENOSPC; 420 return -ENOSPC;
421 } 421 }
422 if (generic_permission(inode, MAY_WRITE, NULL) || 422 if (generic_permission(inode, MAY_WRITE, 0, NULL) ||
423 !S_ISREG(inode->i_mode) || 423 !S_ISREG(inode->i_mode) ||
424 get_write_access(inode)) { 424 get_write_access(inode)) {
425 d_rehash(dentry); 425 d_rehash(dentry);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 6c5f01597c3a..49935ba78db8 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -177,11 +177,18 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
177 return &ei->vfs_inode; 177 return &ei->vfs_inode;
178} 178}
179 179
180static void hpfs_destroy_inode(struct inode *inode) 180static void hpfs_i_callback(struct rcu_head *head)
181{ 181{
182 struct inode *inode = container_of(head, struct inode, i_rcu);
183 INIT_LIST_HEAD(&inode->i_dentry);
182 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); 184 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
183} 185}
184 186
187static void hpfs_destroy_inode(struct inode *inode)
188{
189 call_rcu(&inode->i_rcu, hpfs_i_callback);
190}
191
185static void init_once(void *foo) 192static void init_once(void *foo)
186{ 193{
187 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 194 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f702b5f713fc..87ed48e0343d 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -632,11 +632,18 @@ void hppfs_evict_inode(struct inode *ino)
632 mntput(ino->i_sb->s_fs_info); 632 mntput(ino->i_sb->s_fs_info);
633} 633}
634 634
635static void hppfs_destroy_inode(struct inode *inode) 635static void hppfs_i_callback(struct rcu_head *head)
636{ 636{
637 struct inode *inode = container_of(head, struct inode, i_rcu);
638 INIT_LIST_HEAD(&inode->i_dentry);
637 kfree(HPPFS_I(inode)); 639 kfree(HPPFS_I(inode));
638} 640}
639 641
642static void hppfs_destroy_inode(struct inode *inode)
643{
644 call_rcu(&inode->i_rcu, hppfs_i_callback);
645}
646
640static const struct super_operations hppfs_sbops = { 647static const struct super_operations hppfs_sbops = {
641 .alloc_inode = hppfs_alloc_inode, 648 .alloc_inode = hppfs_alloc_inode,
642 .destroy_inode = hppfs_destroy_inode, 649 .destroy_inode = hppfs_destroy_inode,
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d6cfac1f0a40..9885082b470f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -663,11 +663,18 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
663 return &p->vfs_inode; 663 return &p->vfs_inode;
664} 664}
665 665
666static void hugetlbfs_i_callback(struct rcu_head *head)
667{
668 struct inode *inode = container_of(head, struct inode, i_rcu);
669 INIT_LIST_HEAD(&inode->i_dentry);
670 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
671}
672
666static void hugetlbfs_destroy_inode(struct inode *inode) 673static void hugetlbfs_destroy_inode(struct inode *inode)
667{ 674{
668 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 675 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
669 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 676 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
670 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 677 call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
671} 678}
672 679
673static const struct address_space_operations hugetlbfs_aops = { 680static const struct address_space_operations hugetlbfs_aops = {
@@ -932,8 +939,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
932 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { 939 if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
933 *user = current_user(); 940 *user = current_user();
934 if (user_shm_lock(size, *user)) { 941 if (user_shm_lock(size, *user)) {
935 WARN_ONCE(1, 942 printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
936 "Using mlock ulimits for SHM_HUGETLB deprecated\n");
937 } else { 943 } else {
938 *user = NULL; 944 *user = NULL;
939 return ERR_PTR(-EPERM); 945 return ERR_PTR(-EPERM);
diff --git a/fs/inode.c b/fs/inode.c
index ae2727ab0c3a..da85e56378f3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -102,26 +102,29 @@ static DECLARE_RWSEM(iprune_sem);
102 */ 102 */
103struct inodes_stat_t inodes_stat; 103struct inodes_stat_t inodes_stat;
104 104
105static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; 105static DEFINE_PER_CPU(unsigned int, nr_inodes);
106static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
107 106
108static struct kmem_cache *inode_cachep __read_mostly; 107static struct kmem_cache *inode_cachep __read_mostly;
109 108
110static inline int get_nr_inodes(void) 109static int get_nr_inodes(void)
111{ 110{
112 return percpu_counter_sum_positive(&nr_inodes); 111 int i;
112 int sum = 0;
113 for_each_possible_cpu(i)
114 sum += per_cpu(nr_inodes, i);
115 return sum < 0 ? 0 : sum;
113} 116}
114 117
115static inline int get_nr_inodes_unused(void) 118static inline int get_nr_inodes_unused(void)
116{ 119{
117 return percpu_counter_sum_positive(&nr_inodes_unused); 120 return inodes_stat.nr_unused;
118} 121}
119 122
120int get_nr_dirty_inodes(void) 123int get_nr_dirty_inodes(void)
121{ 124{
125 /* not actually dirty inodes, but a wild approximation */
122 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); 126 int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
123 return nr_dirty > 0 ? nr_dirty : 0; 127 return nr_dirty > 0 ? nr_dirty : 0;
124
125} 128}
126 129
127/* 130/*
@@ -132,7 +135,6 @@ int proc_nr_inodes(ctl_table *table, int write,
132 void __user *buffer, size_t *lenp, loff_t *ppos) 135 void __user *buffer, size_t *lenp, loff_t *ppos)
133{ 136{
134 inodes_stat.nr_inodes = get_nr_inodes(); 137 inodes_stat.nr_inodes = get_nr_inodes();
135 inodes_stat.nr_unused = get_nr_inodes_unused();
136 return proc_dointvec(table, write, buffer, lenp, ppos); 138 return proc_dointvec(table, write, buffer, lenp, ppos);
137} 139}
138#endif 140#endif
@@ -224,7 +226,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
224 inode->i_fsnotify_mask = 0; 226 inode->i_fsnotify_mask = 0;
225#endif 227#endif
226 228
227 percpu_counter_inc(&nr_inodes); 229 this_cpu_inc(nr_inodes);
228 230
229 return 0; 231 return 0;
230out: 232out:
@@ -255,6 +257,12 @@ static struct inode *alloc_inode(struct super_block *sb)
255 return inode; 257 return inode;
256} 258}
257 259
260void free_inode_nonrcu(struct inode *inode)
261{
262 kmem_cache_free(inode_cachep, inode);
263}
264EXPORT_SYMBOL(free_inode_nonrcu);
265
258void __destroy_inode(struct inode *inode) 266void __destroy_inode(struct inode *inode)
259{ 267{
260 BUG_ON(inode_has_buffers(inode)); 268 BUG_ON(inode_has_buffers(inode));
@@ -266,10 +274,17 @@ void __destroy_inode(struct inode *inode)
266 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) 274 if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
267 posix_acl_release(inode->i_default_acl); 275 posix_acl_release(inode->i_default_acl);
268#endif 276#endif
269 percpu_counter_dec(&nr_inodes); 277 this_cpu_dec(nr_inodes);
270} 278}
271EXPORT_SYMBOL(__destroy_inode); 279EXPORT_SYMBOL(__destroy_inode);
272 280
281static void i_callback(struct rcu_head *head)
282{
283 struct inode *inode = container_of(head, struct inode, i_rcu);
284 INIT_LIST_HEAD(&inode->i_dentry);
285 kmem_cache_free(inode_cachep, inode);
286}
287
273static void destroy_inode(struct inode *inode) 288static void destroy_inode(struct inode *inode)
274{ 289{
275 BUG_ON(!list_empty(&inode->i_lru)); 290 BUG_ON(!list_empty(&inode->i_lru));
@@ -277,7 +292,7 @@ static void destroy_inode(struct inode *inode)
277 if (inode->i_sb->s_op->destroy_inode) 292 if (inode->i_sb->s_op->destroy_inode)
278 inode->i_sb->s_op->destroy_inode(inode); 293 inode->i_sb->s_op->destroy_inode(inode);
279 else 294 else
280 kmem_cache_free(inode_cachep, (inode)); 295 call_rcu(&inode->i_rcu, i_callback);
281} 296}
282 297
283/* 298/*
@@ -335,7 +350,7 @@ static void inode_lru_list_add(struct inode *inode)
335{ 350{
336 if (list_empty(&inode->i_lru)) { 351 if (list_empty(&inode->i_lru)) {
337 list_add(&inode->i_lru, &inode_lru); 352 list_add(&inode->i_lru, &inode_lru);
338 percpu_counter_inc(&nr_inodes_unused); 353 inodes_stat.nr_unused++;
339 } 354 }
340} 355}
341 356
@@ -343,7 +358,7 @@ static void inode_lru_list_del(struct inode *inode)
343{ 358{
344 if (!list_empty(&inode->i_lru)) { 359 if (!list_empty(&inode->i_lru)) {
345 list_del_init(&inode->i_lru); 360 list_del_init(&inode->i_lru);
346 percpu_counter_dec(&nr_inodes_unused); 361 inodes_stat.nr_unused--;
347 } 362 }
348} 363}
349 364
@@ -430,6 +445,7 @@ void end_writeback(struct inode *inode)
430 BUG_ON(!(inode->i_state & I_FREEING)); 445 BUG_ON(!(inode->i_state & I_FREEING));
431 BUG_ON(inode->i_state & I_CLEAR); 446 BUG_ON(inode->i_state & I_CLEAR);
432 inode_sync_wait(inode); 447 inode_sync_wait(inode);
448 /* don't need i_lock here, no concurrent mods to i_state */
433 inode->i_state = I_FREEING | I_CLEAR; 449 inode->i_state = I_FREEING | I_CLEAR;
434} 450}
435EXPORT_SYMBOL(end_writeback); 451EXPORT_SYMBOL(end_writeback);
@@ -513,7 +529,7 @@ void evict_inodes(struct super_block *sb)
513 list_move(&inode->i_lru, &dispose); 529 list_move(&inode->i_lru, &dispose);
514 list_del_init(&inode->i_wb_list); 530 list_del_init(&inode->i_wb_list);
515 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 531 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
516 percpu_counter_dec(&nr_inodes_unused); 532 inodes_stat.nr_unused--;
517 } 533 }
518 spin_unlock(&inode_lock); 534 spin_unlock(&inode_lock);
519 535
@@ -554,7 +570,7 @@ int invalidate_inodes(struct super_block *sb)
554 list_move(&inode->i_lru, &dispose); 570 list_move(&inode->i_lru, &dispose);
555 list_del_init(&inode->i_wb_list); 571 list_del_init(&inode->i_wb_list);
556 if (!(inode->i_state & (I_DIRTY | I_SYNC))) 572 if (!(inode->i_state & (I_DIRTY | I_SYNC)))
557 percpu_counter_dec(&nr_inodes_unused); 573 inodes_stat.nr_unused--;
558 } 574 }
559 spin_unlock(&inode_lock); 575 spin_unlock(&inode_lock);
560 576
@@ -616,7 +632,7 @@ static void prune_icache(int nr_to_scan)
616 if (atomic_read(&inode->i_count) || 632 if (atomic_read(&inode->i_count) ||
617 (inode->i_state & ~I_REFERENCED)) { 633 (inode->i_state & ~I_REFERENCED)) {
618 list_del_init(&inode->i_lru); 634 list_del_init(&inode->i_lru);
619 percpu_counter_dec(&nr_inodes_unused); 635 inodes_stat.nr_unused--;
620 continue; 636 continue;
621 } 637 }
622 638
@@ -650,7 +666,7 @@ static void prune_icache(int nr_to_scan)
650 */ 666 */
651 list_move(&inode->i_lru, &freeable); 667 list_move(&inode->i_lru, &freeable);
652 list_del_init(&inode->i_wb_list); 668 list_del_init(&inode->i_wb_list);
653 percpu_counter_dec(&nr_inodes_unused); 669 inodes_stat.nr_unused--;
654 } 670 }
655 if (current_is_kswapd()) 671 if (current_is_kswapd())
656 __count_vm_events(KSWAPD_INODESTEAL, reap); 672 __count_vm_events(KSWAPD_INODESTEAL, reap);
@@ -1648,8 +1664,6 @@ void __init inode_init(void)
1648 SLAB_MEM_SPREAD), 1664 SLAB_MEM_SPREAD),
1649 init_once); 1665 init_once);
1650 register_shrinker(&icache_shrinker); 1666 register_shrinker(&icache_shrinker);
1651 percpu_counter_init(&nr_inodes, 0);
1652 percpu_counter_init(&nr_inodes_unused, 0);
1653 1667
1654 /* Hash may have been set up in inode_init_early */ 1668 /* Hash may have been set up in inode_init_early */
1655 if (!hashdist) 1669 if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index e43b9a4dbf4e..9687c2ee2735 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -63,6 +63,7 @@ extern int copy_mount_string(const void __user *, char **);
63 63
64extern void free_vfsmnt(struct vfsmount *); 64extern void free_vfsmnt(struct vfsmount *);
65extern struct vfsmount *alloc_vfsmnt(const char *); 65extern struct vfsmount *alloc_vfsmnt(const char *);
66extern unsigned int mnt_get_count(struct vfsmount *mnt);
66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 67extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
68 struct vfsmount *); 69 struct vfsmount *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index e92fdbb3bc3a..d6cc16476620 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -6,7 +6,6 @@
6 6
7#include <linux/syscalls.h> 7#include <linux/syscalls.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/smp_lock.h>
10#include <linux/capability.h> 9#include <linux/capability.h>
11#include <linux/file.h> 10#include <linux/file.h>
12#include <linux/fs.h> 11#include <linux/fs.h>
@@ -530,41 +529,6 @@ static int ioctl_fsthaw(struct file *filp)
530 return thaw_super(sb); 529 return thaw_super(sb);
531} 530}
532 531
533static int ioctl_fstrim(struct file *filp, void __user *argp)
534{
535 struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
536 struct fstrim_range range;
537 int ret = 0;
538
539 if (!capable(CAP_SYS_ADMIN))
540 return -EPERM;
541
542 /* If filesystem doesn't support trim feature, return. */
543 if (sb->s_op->trim_fs == NULL)
544 return -EOPNOTSUPP;
545
546 /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
547 if (sb->s_bdev == NULL)
548 return -EINVAL;
549
550 if (argp == NULL) {
551 range.start = 0;
552 range.len = ULLONG_MAX;
553 range.minlen = 0;
554 } else if (copy_from_user(&range, argp, sizeof(range)))
555 return -EFAULT;
556
557 ret = sb->s_op->trim_fs(sb, &range);
558 if (ret < 0)
559 return ret;
560
561 if ((argp != NULL) &&
562 (copy_to_user(argp, &range, sizeof(range))))
563 return -EFAULT;
564
565 return 0;
566}
567
568/* 532/*
569 * When you add any new common ioctls to the switches above and below 533 * When you add any new common ioctls to the switches above and below
570 * please update compat_sys_ioctl() too. 534 * please update compat_sys_ioctl() too.
@@ -615,10 +579,6 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
615 error = ioctl_fsthaw(filp); 579 error = ioctl_fsthaw(filp);
616 break; 580 break;
617 581
618 case FITRIM:
619 error = ioctl_fstrim(filp, argp);
620 break;
621
622 case FS_IOC_FIEMAP: 582 case FS_IOC_FIEMAP:
623 return ioctl_fiemap(filp, arg); 583 return ioctl_fiemap(filp, arg);
624 584
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb92dcc6..7da2a06508e5 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -103,12 +103,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
103 } 103 }
104 104
105 ret = -ESRCH; 105 ret = -ESRCH;
106 /* 106 rcu_read_lock();
107 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
108 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
109 * in copy_process().
110 */
111 read_lock(&tasklist_lock);
112 switch (which) { 107 switch (which) {
113 case IOPRIO_WHO_PROCESS: 108 case IOPRIO_WHO_PROCESS:
114 if (!who) 109 if (!who)
@@ -153,7 +148,7 @@ free_uid:
153 ret = -EINVAL; 148 ret = -EINVAL;
154 } 149 }
155 150
156 read_unlock(&tasklist_lock); 151 rcu_read_unlock();
157 return ret; 152 return ret;
158} 153}
159 154
@@ -197,7 +192,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
197 int ret = -ESRCH; 192 int ret = -ESRCH;
198 int tmpio; 193 int tmpio;
199 194
200 read_lock(&tasklist_lock); 195 rcu_read_lock();
201 switch (which) { 196 switch (which) {
202 case IOPRIO_WHO_PROCESS: 197 case IOPRIO_WHO_PROCESS:
203 if (!who) 198 if (!who)
@@ -250,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
250 ret = -EINVAL; 245 ret = -EINVAL;
251 } 246 }
252 247
253 read_unlock(&tasklist_lock); 248 rcu_read_unlock();
254 return ret; 249 return ret;
255} 250}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bfdeb82a53be..844a7903c72f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -26,16 +26,32 @@
26 26
27#define BEQUIET 27#define BEQUIET
28 28
29static int isofs_hashi(struct dentry *parent, struct qstr *qstr); 29static int isofs_hashi(const struct dentry *parent, const struct inode *inode,
30static int isofs_hash(struct dentry *parent, struct qstr *qstr); 30 struct qstr *qstr);
31static int isofs_dentry_cmpi(struct dentry *dentry, struct qstr *a, struct qstr *b); 31static int isofs_hash(const struct dentry *parent, const struct inode *inode,
32static int isofs_dentry_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b); 32 struct qstr *qstr);
33static int isofs_dentry_cmpi(const struct dentry *parent,
34 const struct inode *pinode,
35 const struct dentry *dentry, const struct inode *inode,
36 unsigned int len, const char *str, const struct qstr *name);
37static int isofs_dentry_cmp(const struct dentry *parent,
38 const struct inode *pinode,
39 const struct dentry *dentry, const struct inode *inode,
40 unsigned int len, const char *str, const struct qstr *name);
33 41
34#ifdef CONFIG_JOLIET 42#ifdef CONFIG_JOLIET
35static int isofs_hashi_ms(struct dentry *parent, struct qstr *qstr); 43static int isofs_hashi_ms(const struct dentry *parent, const struct inode *inode,
36static int isofs_hash_ms(struct dentry *parent, struct qstr *qstr); 44 struct qstr *qstr);
37static int isofs_dentry_cmpi_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); 45static int isofs_hash_ms(const struct dentry *parent, const struct inode *inode,
38static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qstr *b); 46 struct qstr *qstr);
47static int isofs_dentry_cmpi_ms(const struct dentry *parent,
48 const struct inode *pinode,
49 const struct dentry *dentry, const struct inode *inode,
50 unsigned int len, const char *str, const struct qstr *name);
51static int isofs_dentry_cmp_ms(const struct dentry *parent,
52 const struct inode *pinode,
53 const struct dentry *dentry, const struct inode *inode,
54 unsigned int len, const char *str, const struct qstr *name);
39#endif 55#endif
40 56
41static void isofs_put_super(struct super_block *sb) 57static void isofs_put_super(struct super_block *sb)
@@ -65,11 +81,18 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
65 return &ei->vfs_inode; 81 return &ei->vfs_inode;
66} 82}
67 83
68static void isofs_destroy_inode(struct inode *inode) 84static void isofs_i_callback(struct rcu_head *head)
69{ 85{
86 struct inode *inode = container_of(head, struct inode, i_rcu);
87 INIT_LIST_HEAD(&inode->i_dentry);
70 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); 88 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
71} 89}
72 90
91static void isofs_destroy_inode(struct inode *inode)
92{
93 call_rcu(&inode->i_rcu, isofs_i_callback);
94}
95
73static void init_once(void *foo) 96static void init_once(void *foo)
74{ 97{
75 struct iso_inode_info *ei = foo; 98 struct iso_inode_info *ei = foo;
@@ -160,7 +183,7 @@ struct iso9660_options{
160 * Compute the hash for the isofs name corresponding to the dentry. 183 * Compute the hash for the isofs name corresponding to the dentry.
161 */ 184 */
162static int 185static int
163isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms) 186isofs_hash_common(const struct dentry *dentry, struct qstr *qstr, int ms)
164{ 187{
165 const char *name; 188 const char *name;
166 int len; 189 int len;
@@ -181,7 +204,7 @@ isofs_hash_common(struct dentry *dentry, struct qstr *qstr, int ms)
181 * Compute the hash for the isofs name corresponding to the dentry. 204 * Compute the hash for the isofs name corresponding to the dentry.
182 */ 205 */
183static int 206static int
184isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms) 207isofs_hashi_common(const struct dentry *dentry, struct qstr *qstr, int ms)
185{ 208{
186 const char *name; 209 const char *name;
187 int len; 210 int len;
@@ -206,100 +229,94 @@ isofs_hashi_common(struct dentry *dentry, struct qstr *qstr, int ms)
206} 229}
207 230
208/* 231/*
209 * Case insensitive compare of two isofs names. 232 * Compare of two isofs names.
210 */
211static int isofs_dentry_cmpi_common(struct dentry *dentry, struct qstr *a,
212 struct qstr *b, int ms)
213{
214 int alen, blen;
215
216 /* A filename cannot end in '.' or we treat it like it has none */
217 alen = a->len;
218 blen = b->len;
219 if (ms) {
220 while (alen && a->name[alen-1] == '.')
221 alen--;
222 while (blen && b->name[blen-1] == '.')
223 blen--;
224 }
225 if (alen == blen) {
226 if (strnicmp(a->name, b->name, alen) == 0)
227 return 0;
228 }
229 return 1;
230}
231
232/*
233 * Case sensitive compare of two isofs names.
234 */ 233 */
235static int isofs_dentry_cmp_common(struct dentry *dentry, struct qstr *a, 234static int isofs_dentry_cmp_common(
236 struct qstr *b, int ms) 235 unsigned int len, const char *str,
236 const struct qstr *name, int ms, int ci)
237{ 237{
238 int alen, blen; 238 int alen, blen;
239 239
240 /* A filename cannot end in '.' or we treat it like it has none */ 240 /* A filename cannot end in '.' or we treat it like it has none */
241 alen = a->len; 241 alen = name->len;
242 blen = b->len; 242 blen = len;
243 if (ms) { 243 if (ms) {
244 while (alen && a->name[alen-1] == '.') 244 while (alen && name->name[alen-1] == '.')
245 alen--; 245 alen--;
246 while (blen && b->name[blen-1] == '.') 246 while (blen && str[blen-1] == '.')
247 blen--; 247 blen--;
248 } 248 }
249 if (alen == blen) { 249 if (alen == blen) {
250 if (strncmp(a->name, b->name, alen) == 0) 250 if (ci) {
251 return 0; 251 if (strnicmp(name->name, str, alen) == 0)
252 return 0;
253 } else {
254 if (strncmp(name->name, str, alen) == 0)
255 return 0;
256 }
252 } 257 }
253 return 1; 258 return 1;
254} 259}
255 260
256static int 261static int
257isofs_hash(struct dentry *dentry, struct qstr *qstr) 262isofs_hash(const struct dentry *dentry, const struct inode *inode,
263 struct qstr *qstr)
258{ 264{
259 return isofs_hash_common(dentry, qstr, 0); 265 return isofs_hash_common(dentry, qstr, 0);
260} 266}
261 267
262static int 268static int
263isofs_hashi(struct dentry *dentry, struct qstr *qstr) 269isofs_hashi(const struct dentry *dentry, const struct inode *inode,
270 struct qstr *qstr)
264{ 271{
265 return isofs_hashi_common(dentry, qstr, 0); 272 return isofs_hashi_common(dentry, qstr, 0);
266} 273}
267 274
268static int 275static int
269isofs_dentry_cmp(struct dentry *dentry,struct qstr *a,struct qstr *b) 276isofs_dentry_cmp(const struct dentry *parent, const struct inode *pinode,
277 const struct dentry *dentry, const struct inode *inode,
278 unsigned int len, const char *str, const struct qstr *name)
270{ 279{
271 return isofs_dentry_cmp_common(dentry, a, b, 0); 280 return isofs_dentry_cmp_common(len, str, name, 0, 0);
272} 281}
273 282
274static int 283static int
275isofs_dentry_cmpi(struct dentry *dentry,struct qstr *a,struct qstr *b) 284isofs_dentry_cmpi(const struct dentry *parent, const struct inode *pinode,
285 const struct dentry *dentry, const struct inode *inode,
286 unsigned int len, const char *str, const struct qstr *name)
276{ 287{
277 return isofs_dentry_cmpi_common(dentry, a, b, 0); 288 return isofs_dentry_cmp_common(len, str, name, 0, 1);
278} 289}
279 290
280#ifdef CONFIG_JOLIET 291#ifdef CONFIG_JOLIET
281static int 292static int
282isofs_hash_ms(struct dentry *dentry, struct qstr *qstr) 293isofs_hash_ms(const struct dentry *dentry, const struct inode *inode,
294 struct qstr *qstr)
283{ 295{
284 return isofs_hash_common(dentry, qstr, 1); 296 return isofs_hash_common(dentry, qstr, 1);
285} 297}
286 298
287static int 299static int
288isofs_hashi_ms(struct dentry *dentry, struct qstr *qstr) 300isofs_hashi_ms(const struct dentry *dentry, const struct inode *inode,
301 struct qstr *qstr)
289{ 302{
290 return isofs_hashi_common(dentry, qstr, 1); 303 return isofs_hashi_common(dentry, qstr, 1);
291} 304}
292 305
293static int 306static int
294isofs_dentry_cmp_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) 307isofs_dentry_cmp_ms(const struct dentry *parent, const struct inode *pinode,
308 const struct dentry *dentry, const struct inode *inode,
309 unsigned int len, const char *str, const struct qstr *name)
295{ 310{
296 return isofs_dentry_cmp_common(dentry, a, b, 1); 311 return isofs_dentry_cmp_common(len, str, name, 1, 0);
297} 312}
298 313
299static int 314static int
300isofs_dentry_cmpi_ms(struct dentry *dentry,struct qstr *a,struct qstr *b) 315isofs_dentry_cmpi_ms(const struct dentry *parent, const struct inode *pinode,
316 const struct dentry *dentry, const struct inode *inode,
317 unsigned int len, const char *str, const struct qstr *name)
301{ 318{
302 return isofs_dentry_cmpi_common(dentry, a, b, 1); 319 return isofs_dentry_cmp_common(len, str, name, 1, 1);
303} 320}
304#endif 321#endif
305 322
@@ -932,7 +949,7 @@ root_found:
932 table += 2; 949 table += 2;
933 if (opt.check == 'r') 950 if (opt.check == 'r')
934 table++; 951 table++;
935 s->s_root->d_op = &isofs_dentry_ops[table]; 952 d_set_d_op(s->s_root, &isofs_dentry_ops[table]);
936 953
937 kfree(opt.iocharset); 954 kfree(opt.iocharset);
938 955
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 0d23abfd4280..679a849c3b27 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -37,7 +37,8 @@ isofs_cmp(struct dentry *dentry, const char *compare, int dlen)
37 37
38 qstr.name = compare; 38 qstr.name = compare;
39 qstr.len = dlen; 39 qstr.len = dlen;
40 return dentry->d_op->d_compare(dentry, &dentry->d_name, &qstr); 40 return dentry->d_op->d_compare(NULL, NULL, NULL, NULL,
41 dentry->d_name.len, dentry->d_name.name, &qstr);
41} 42}
42 43
43/* 44/*
@@ -171,7 +172,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
171 struct inode *inode; 172 struct inode *inode;
172 struct page *page; 173 struct page *page;
173 174
174 dentry->d_op = dir->i_sb->s_root->d_op; 175 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
175 176
176 page = alloc_page(GFP_USER); 177 page = alloc_page(GFP_USER);
177 if (!page) 178 if (!page)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c590d155c095..f837ba953529 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -899,6 +899,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
899 899
900 /* journal descriptor can store up to n blocks -bzzz */ 900 /* journal descriptor can store up to n blocks -bzzz */
901 journal->j_blocksize = blocksize; 901 journal->j_blocksize = blocksize;
902 journal->j_dev = bdev;
903 journal->j_fs_dev = fs_dev;
904 journal->j_blk_offset = start;
905 journal->j_maxlen = len;
906 bdevname(journal->j_dev, journal->j_devname);
907 p = journal->j_devname;
908 while ((p = strchr(p, '/')))
909 *p = '!';
902 jbd2_stats_proc_init(journal); 910 jbd2_stats_proc_init(journal);
903 n = journal->j_blocksize / sizeof(journal_block_tag_t); 911 n = journal->j_blocksize / sizeof(journal_block_tag_t);
904 journal->j_wbufsize = n; 912 journal->j_wbufsize = n;
@@ -908,14 +916,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
908 __func__); 916 __func__);
909 goto out_err; 917 goto out_err;
910 } 918 }
911 journal->j_dev = bdev;
912 journal->j_fs_dev = fs_dev;
913 journal->j_blk_offset = start;
914 journal->j_maxlen = len;
915 bdevname(journal->j_dev, journal->j_devname);
916 p = journal->j_devname;
917 while ((p = strchr(p, '/')))
918 *p = '!';
919 919
920 bh = __getblk(journal->j_dev, start, journal->j_blocksize); 920 bh = __getblk(journal->j_dev, start, journal->j_blocksize);
921 if (!bh) { 921 if (!bh) {
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 54a92fd02bbd..95b79672150a 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -259,11 +259,14 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
259 return rc; 259 return rc;
260} 260}
261 261
262int jffs2_check_acl(struct inode *inode, int mask) 262int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags)
263{ 263{
264 struct posix_acl *acl; 264 struct posix_acl *acl;
265 int rc; 265 int rc;
266 266
267 if (flags & IPERM_FLAG_RCU)
268 return -ECHILD;
269
267 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); 270 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
268 if (IS_ERR(acl)) 271 if (IS_ERR(acl))
269 return PTR_ERR(acl); 272 return PTR_ERR(acl);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 5e42de8d9541..3119f59253d3 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29extern int jffs2_check_acl(struct inode *, int); 29extern int jffs2_check_acl(struct inode *, int, unsigned int);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index c86041b866a4..853b8e300084 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -40,11 +40,18 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
40 return &f->vfs_inode; 40 return &f->vfs_inode;
41} 41}
42 42
43static void jffs2_destroy_inode(struct inode *inode) 43static void jffs2_i_callback(struct rcu_head *head)
44{ 44{
45 struct inode *inode = container_of(head, struct inode, i_rcu);
46 INIT_LIST_HEAD(&inode->i_dentry);
45 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); 47 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
46} 48}
47 49
50static void jffs2_destroy_inode(struct inode *inode)
51{
52 call_rcu(&inode->i_rcu, jffs2_i_callback);
53}
54
48static void jffs2_i_init_once(void *foo) 55static void jffs2_i_init_once(void *foo)
49{ 56{
50 struct jffs2_inode_info *f = foo; 57 struct jffs2_inode_info *f = foo;
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 1057a4998e4e..e5de9422fa32 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,10 +114,14 @@ out:
114 return rc; 114 return rc;
115} 115}
116 116
117int jfs_check_acl(struct inode *inode, int mask) 117int jfs_check_acl(struct inode *inode, int mask, unsigned int flags)
118{ 118{
119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 119 struct posix_acl *acl;
120
121 if (flags & IPERM_FLAG_RCU)
122 return -ECHILD;
120 123
124 acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
121 if (IS_ERR(acl)) 125 if (IS_ERR(acl))
122 return PTR_ERR(acl); 126 return PTR_ERR(acl);
123 if (acl) { 127 if (acl) {
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 54e07559878d..f9285c4900fa 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_check_acl(struct inode *, int); 23int jfs_check_acl(struct inode *, int, unsigned int flags);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_acl_chmod(struct inode *inode); 25int jfs_acl_chmod(struct inode *inode);
26 26
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 231ca4af9bce..4414e3a42264 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -18,6 +18,7 @@
18 */ 18 */
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/namei.h>
21#include <linux/ctype.h> 22#include <linux/ctype.h>
22#include <linux/quotaops.h> 23#include <linux/quotaops.h>
23#include <linux/exportfs.h> 24#include <linux/exportfs.h>
@@ -1465,7 +1466,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1465 jfs_info("jfs_lookup: name = %s", name); 1466 jfs_info("jfs_lookup: name = %s", name);
1466 1467
1467 if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2) 1468 if (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)
1468 dentry->d_op = &jfs_ci_dentry_operations; 1469 d_set_d_op(dentry, &jfs_ci_dentry_operations);
1469 1470
1470 if ((name[0] == '.') && (len == 1)) 1471 if ((name[0] == '.') && (len == 1))
1471 inum = dip->i_ino; 1472 inum = dip->i_ino;
@@ -1494,7 +1495,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1494 dentry = d_splice_alias(ip, dentry); 1495 dentry = d_splice_alias(ip, dentry);
1495 1496
1496 if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2)) 1497 if (dentry && (JFS_SBI(dip->i_sb)->mntflag & JFS_OS2))
1497 dentry->d_op = &jfs_ci_dentry_operations; 1498 d_set_d_op(dentry, &jfs_ci_dentry_operations);
1498 1499
1499 return dentry; 1500 return dentry;
1500} 1501}
@@ -1573,7 +1574,8 @@ const struct file_operations jfs_dir_operations = {
1573 .llseek = generic_file_llseek, 1574 .llseek = generic_file_llseek,
1574}; 1575};
1575 1576
1576static int jfs_ci_hash(struct dentry *dir, struct qstr *this) 1577static int jfs_ci_hash(const struct dentry *dir, const struct inode *inode,
1578 struct qstr *this)
1577{ 1579{
1578 unsigned long hash; 1580 unsigned long hash;
1579 int i; 1581 int i;
@@ -1586,32 +1588,63 @@ static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
1586 return 0; 1588 return 0;
1587} 1589}
1588 1590
1589static int jfs_ci_compare(struct dentry *dir, struct qstr *a, struct qstr *b) 1591static int jfs_ci_compare(const struct dentry *parent,
1592 const struct inode *pinode,
1593 const struct dentry *dentry, const struct inode *inode,
1594 unsigned int len, const char *str, const struct qstr *name)
1590{ 1595{
1591 int i, result = 1; 1596 int i, result = 1;
1592 1597
1593 if (a->len != b->len) 1598 if (len != name->len)
1594 goto out; 1599 goto out;
1595 for (i=0; i < a->len; i++) { 1600 for (i=0; i < len; i++) {
1596 if (tolower(a->name[i]) != tolower(b->name[i])) 1601 if (tolower(str[i]) != tolower(name->name[i]))
1597 goto out; 1602 goto out;
1598 } 1603 }
1599 result = 0; 1604 result = 0;
1605out:
1606 return result;
1607}
1600 1608
1609static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1610{
1611 if (nd->flags & LOOKUP_RCU)
1612 return -ECHILD;
1601 /* 1613 /*
1602 * We want creates to preserve case. A negative dentry, a, that 1614 * This is not negative dentry. Always valid.
1603 * has a different case than b may cause a new entry to be created 1615 *
1604 * with the wrong case. Since we can't tell if a comes from a negative 1616 * Note, rename() to existing directory entry will have ->d_inode,
1605 * dentry, we blindly replace it with b. This should be harmless if 1617 * and will use existing name which isn't specified name by user.
1606 * a is not a negative dentry. 1618 *
1619 * We may be able to drop this positive dentry here. But dropping
1620 * positive dentry isn't good idea. So it's unsupported like
1621 * rename("filename", "FILENAME") for now.
1607 */ 1622 */
1608 memcpy((unsigned char *)a->name, b->name, a->len); 1623 if (dentry->d_inode)
1609out: 1624 return 1;
1610 return result; 1625
1626 /*
1627 * This may be nfsd (or something), anyway, we can't see the
1628 * intent of this. So, since this can be for creation, drop it.
1629 */
1630 if (!nd)
1631 return 0;
1632
1633 /*
1634 * Drop the negative dentry, in order to make sure to use the
1635 * case sensitive name which is specified by user if this is
1636 * for creation.
1637 */
1638 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) {
1639 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1640 return 0;
1641 }
1642 return 1;
1611} 1643}
1612 1644
1613const struct dentry_operations jfs_ci_dentry_operations = 1645const struct dentry_operations jfs_ci_dentry_operations =
1614{ 1646{
1615 .d_hash = jfs_ci_hash, 1647 .d_hash = jfs_ci_hash,
1616 .d_compare = jfs_ci_compare, 1648 .d_compare = jfs_ci_compare,
1649 .d_revalidate = jfs_ci_revalidate,
1617}; 1650};
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0669fc1cc3bf..3150d766e0d4 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -115,6 +115,14 @@ static struct inode *jfs_alloc_inode(struct super_block *sb)
115 return &jfs_inode->vfs_inode; 115 return &jfs_inode->vfs_inode;
116} 116}
117 117
118static void jfs_i_callback(struct rcu_head *head)
119{
120 struct inode *inode = container_of(head, struct inode, i_rcu);
121 struct jfs_inode_info *ji = JFS_IP(inode);
122 INIT_LIST_HEAD(&inode->i_dentry);
123 kmem_cache_free(jfs_inode_cachep, ji);
124}
125
118static void jfs_destroy_inode(struct inode *inode) 126static void jfs_destroy_inode(struct inode *inode)
119{ 127{
120 struct jfs_inode_info *ji = JFS_IP(inode); 128 struct jfs_inode_info *ji = JFS_IP(inode);
@@ -128,7 +136,7 @@ static void jfs_destroy_inode(struct inode *inode)
128 ji->active_ag = -1; 136 ji->active_ag = -1;
129 } 137 }
130 spin_unlock_irq(&ji->ag_lock); 138 spin_unlock_irq(&ji->ag_lock);
131 kmem_cache_free(jfs_inode_cachep, ji); 139 call_rcu(&inode->i_rcu, jfs_i_callback);
132} 140}
133 141
134static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf) 142static int jfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -517,7 +525,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
517 goto out_no_root; 525 goto out_no_root;
518 526
519 if (sbi->mntflag & JFS_OS2) 527 if (sbi->mntflag & JFS_OS2)
520 sb->s_root->d_op = &jfs_ci_dentry_operations; 528 d_set_d_op(sb->s_root, &jfs_ci_dentry_operations);
521 529
522 /* logical blocks are represented by 40 bits in pxd_t, etc. */ 530 /* logical blocks are represented by 40 bits in pxd_t, etc. */
523 sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; 531 sb->s_maxbytes = ((u64) sb->s_blocksize) << 40;
diff --git a/fs/libfs.c b/fs/libfs.c
index a3accdf528ad..889311e3d06b 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,6 +16,11 @@
16 16
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18 18
19static inline int simple_positive(struct dentry *dentry)
20{
21 return dentry->d_inode && !d_unhashed(dentry);
22}
23
19int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, 24int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
20 struct kstat *stat) 25 struct kstat *stat)
21{ 26{
@@ -37,7 +42,7 @@ int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
37 * Retaining negative dentries for an in-memory filesystem just wastes 42 * Retaining negative dentries for an in-memory filesystem just wastes
38 * memory and lookup time: arrange for them to be deleted immediately. 43 * memory and lookup time: arrange for them to be deleted immediately.
39 */ 44 */
40static int simple_delete_dentry(struct dentry *dentry) 45static int simple_delete_dentry(const struct dentry *dentry)
41{ 46{
42 return 1; 47 return 1;
43} 48}
@@ -54,7 +59,7 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct na
54 59
55 if (dentry->d_name.len > NAME_MAX) 60 if (dentry->d_name.len > NAME_MAX)
56 return ERR_PTR(-ENAMETOOLONG); 61 return ERR_PTR(-ENAMETOOLONG);
57 dentry->d_op = &simple_dentry_operations; 62 d_set_d_op(dentry, &simple_dentry_operations);
58 d_add(dentry, NULL); 63 d_add(dentry, NULL);
59 return NULL; 64 return NULL;
60} 65}
@@ -76,7 +81,8 @@ int dcache_dir_close(struct inode *inode, struct file *file)
76 81
77loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin) 82loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
78{ 83{
79 mutex_lock(&file->f_path.dentry->d_inode->i_mutex); 84 struct dentry *dentry = file->f_path.dentry;
85 mutex_lock(&dentry->d_inode->i_mutex);
80 switch (origin) { 86 switch (origin) {
81 case 1: 87 case 1:
82 offset += file->f_pos; 88 offset += file->f_pos;
@@ -84,7 +90,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
84 if (offset >= 0) 90 if (offset >= 0)
85 break; 91 break;
86 default: 92 default:
87 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 93 mutex_unlock(&dentry->d_inode->i_mutex);
88 return -EINVAL; 94 return -EINVAL;
89 } 95 }
90 if (offset != file->f_pos) { 96 if (offset != file->f_pos) {
@@ -94,21 +100,24 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
94 struct dentry *cursor = file->private_data; 100 struct dentry *cursor = file->private_data;
95 loff_t n = file->f_pos - 2; 101 loff_t n = file->f_pos - 2;
96 102
97 spin_lock(&dcache_lock); 103 spin_lock(&dentry->d_lock);
104 /* d_lock not required for cursor */
98 list_del(&cursor->d_u.d_child); 105 list_del(&cursor->d_u.d_child);
99 p = file->f_path.dentry->d_subdirs.next; 106 p = dentry->d_subdirs.next;
100 while (n && p != &file->f_path.dentry->d_subdirs) { 107 while (n && p != &dentry->d_subdirs) {
101 struct dentry *next; 108 struct dentry *next;
102 next = list_entry(p, struct dentry, d_u.d_child); 109 next = list_entry(p, struct dentry, d_u.d_child);
103 if (!d_unhashed(next) && next->d_inode) 110 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
111 if (simple_positive(next))
104 n--; 112 n--;
113 spin_unlock(&next->d_lock);
105 p = p->next; 114 p = p->next;
106 } 115 }
107 list_add_tail(&cursor->d_u.d_child, p); 116 list_add_tail(&cursor->d_u.d_child, p);
108 spin_unlock(&dcache_lock); 117 spin_unlock(&dentry->d_lock);
109 } 118 }
110 } 119 }
111 mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); 120 mutex_unlock(&dentry->d_inode->i_mutex);
112 return offset; 121 return offset;
113} 122}
114 123
@@ -148,29 +157,35 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
148 i++; 157 i++;
149 /* fallthrough */ 158 /* fallthrough */
150 default: 159 default:
151 spin_lock(&dcache_lock); 160 spin_lock(&dentry->d_lock);
152 if (filp->f_pos == 2) 161 if (filp->f_pos == 2)
153 list_move(q, &dentry->d_subdirs); 162 list_move(q, &dentry->d_subdirs);
154 163
155 for (p=q->next; p != &dentry->d_subdirs; p=p->next) { 164 for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
156 struct dentry *next; 165 struct dentry *next;
157 next = list_entry(p, struct dentry, d_u.d_child); 166 next = list_entry(p, struct dentry, d_u.d_child);
158 if (d_unhashed(next) || !next->d_inode) 167 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
168 if (!simple_positive(next)) {
169 spin_unlock(&next->d_lock);
159 continue; 170 continue;
171 }
160 172
161 spin_unlock(&dcache_lock); 173 spin_unlock(&next->d_lock);
174 spin_unlock(&dentry->d_lock);
162 if (filldir(dirent, next->d_name.name, 175 if (filldir(dirent, next->d_name.name,
163 next->d_name.len, filp->f_pos, 176 next->d_name.len, filp->f_pos,
164 next->d_inode->i_ino, 177 next->d_inode->i_ino,
165 dt_type(next->d_inode)) < 0) 178 dt_type(next->d_inode)) < 0)
166 return 0; 179 return 0;
167 spin_lock(&dcache_lock); 180 spin_lock(&dentry->d_lock);
181 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
168 /* next is still alive */ 182 /* next is still alive */
169 list_move(q, p); 183 list_move(q, p);
184 spin_unlock(&next->d_lock);
170 p = q; 185 p = q;
171 filp->f_pos++; 186 filp->f_pos++;
172 } 187 }
173 spin_unlock(&dcache_lock); 188 spin_unlock(&dentry->d_lock);
174 } 189 }
175 return 0; 190 return 0;
176} 191}
@@ -259,23 +274,23 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
259 return 0; 274 return 0;
260} 275}
261 276
262static inline int simple_positive(struct dentry *dentry)
263{
264 return dentry->d_inode && !d_unhashed(dentry);
265}
266
267int simple_empty(struct dentry *dentry) 277int simple_empty(struct dentry *dentry)
268{ 278{
269 struct dentry *child; 279 struct dentry *child;
270 int ret = 0; 280 int ret = 0;
271 281
272 spin_lock(&dcache_lock); 282 spin_lock(&dentry->d_lock);
273 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) 283 list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
274 if (simple_positive(child)) 284 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
285 if (simple_positive(child)) {
286 spin_unlock(&child->d_lock);
275 goto out; 287 goto out;
288 }
289 spin_unlock(&child->d_lock);
290 }
276 ret = 1; 291 ret = 1;
277out: 292out:
278 spin_unlock(&dcache_lock); 293 spin_unlock(&dentry->d_lock);
279 return ret; 294 return ret;
280} 295}
281 296
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index d5bb86866e6c..25509eb28fd7 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,7 +14,6 @@
14#include <linux/sunrpc/clnt.h> 14#include <linux/sunrpc/clnt.h>
15#include <linux/sunrpc/svc.h> 15#include <linux/sunrpc/svc.h>
16#include <linux/lockd/lockd.h> 16#include <linux/lockd/lockd.h>
17#include <linux/smp_lock.h>
18#include <linux/kthread.h> 17#include <linux/kthread.h>
19 18
20#define NLMDBG_FACILITY NLMDBG_CLIENT 19#define NLMDBG_FACILITY NLMDBG_CLIENT
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 47ea1e1925b8..332c54cf75e0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,7 +7,6 @@
7 */ 7 */
8 8
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/smp_lock.h>
11#include <linux/slab.h> 10#include <linux/slab.h>
12#include <linux/types.h> 11#include <linux/types.h>
13#include <linux/errno.h> 12#include <linux/errno.h>
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 25e21e4023b2..ed0c59fe23ce 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -124,7 +124,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
124 continue; 124 continue;
125 if (host->h_server != ni->server) 125 if (host->h_server != ni->server)
126 continue; 126 continue;
127 if (ni->server && 127 if (ni->server && ni->src_len != 0 &&
128 !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap)) 128 !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
129 continue; 129 continue;
130 130
@@ -167,6 +167,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
167 host->h_addrlen = ni->salen; 167 host->h_addrlen = ni->salen;
168 rpc_set_port(nlm_addr(host), 0); 168 rpc_set_port(nlm_addr(host), 0);
169 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); 169 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
170 host->h_srcaddrlen = ni->src_len;
170 host->h_version = ni->version; 171 host->h_version = ni->version;
171 host->h_proto = ni->protocol; 172 host->h_proto = ni->protocol;
172 host->h_rpcclnt = NULL; 173 host->h_rpcclnt = NULL;
@@ -238,9 +239,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
238 const char *hostname, 239 const char *hostname,
239 int noresvport) 240 int noresvport)
240{ 241{
241 const struct sockaddr source = {
242 .sa_family = AF_UNSPEC,
243 };
244 struct nlm_lookup_host_info ni = { 242 struct nlm_lookup_host_info ni = {
245 .server = 0, 243 .server = 0,
246 .sap = sap, 244 .sap = sap,
@@ -249,8 +247,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
249 .version = version, 247 .version = version,
250 .hostname = hostname, 248 .hostname = hostname,
251 .hostname_len = strlen(hostname), 249 .hostname_len = strlen(hostname),
252 .src_sap = &source,
253 .src_len = sizeof(source),
254 .noresvport = noresvport, 250 .noresvport = noresvport,
255 }; 251 };
256 252
@@ -357,7 +353,6 @@ nlm_bind_host(struct nlm_host *host)
357 .protocol = host->h_proto, 353 .protocol = host->h_proto,
358 .address = nlm_addr(host), 354 .address = nlm_addr(host),
359 .addrsize = host->h_addrlen, 355 .addrsize = host->h_addrlen,
360 .saddress = nlm_srcaddr(host),
361 .timeout = &timeparms, 356 .timeout = &timeparms,
362 .servername = host->h_name, 357 .servername = host->h_name,
363 .program = &nlm_program, 358 .program = &nlm_program,
@@ -376,6 +371,8 @@ nlm_bind_host(struct nlm_host *host)
376 args.flags |= RPC_CLNT_CREATE_HARDRTRY; 371 args.flags |= RPC_CLNT_CREATE_HARDRTRY;
377 if (host->h_noresvport) 372 if (host->h_noresvport)
378 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; 373 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
374 if (host->h_srcaddrlen)
375 args.saddress = nlm_srcaddr(host);
379 376
380 clnt = rpc_create(&args); 377 clnt = rpc_create(&args);
381 if (!IS_ERR(clnt)) 378 if (!IS_ERR(clnt))
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a336e832475d..38d261192453 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -9,7 +9,6 @@
9 9
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/smp_lock.h>
13#include <linux/lockd/lockd.h> 12#include <linux/lockd/lockd.h>
14#include <linux/lockd/share.h> 13#include <linux/lockd/share.h>
15 14
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c462d346acbd..ef5659b211e9 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -25,7 +25,6 @@
25#include <linux/errno.h> 25#include <linux/errno.h>
26#include <linux/kernel.h> 26#include <linux/kernel.h>
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/smp_lock.h>
29#include <linux/sunrpc/clnt.h> 28#include <linux/sunrpc/clnt.h>
30#include <linux/sunrpc/svc.h> 29#include <linux/sunrpc/svc.h>
31#include <linux/lockd/nlm.h> 30#include <linux/lockd/nlm.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index c3069f38d602..0caea5310ac3 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -9,7 +9,6 @@
9 9
10#include <linux/types.h> 10#include <linux/types.h>
11#include <linux/time.h> 11#include <linux/time.h>
12#include <linux/smp_lock.h>
13#include <linux/lockd/lockd.h> 12#include <linux/lockd/lockd.h>
14#include <linux/lockd/share.h> 13#include <linux/lockd/share.h>
15 14
diff --git a/fs/locks.c b/fs/locks.c
index 65765cb6afed..08415b2a6d36 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -122,7 +122,6 @@
122#include <linux/module.h> 122#include <linux/module.h>
123#include <linux/security.h> 123#include <linux/security.h>
124#include <linux/slab.h> 124#include <linux/slab.h>
125#include <linux/smp_lock.h>
126#include <linux/syscalls.h> 125#include <linux/syscalls.h>
127#include <linux/time.h> 126#include <linux/time.h>
128#include <linux/rcupdate.h> 127#include <linux/rcupdate.h>
@@ -1390,7 +1389,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
1390 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) 1389 if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
1391 goto out; 1390 goto out;
1392 if ((arg == F_WRLCK) 1391 if ((arg == F_WRLCK)
1393 && ((atomic_read(&dentry->d_count) > 1) 1392 && ((dentry->d_count > 1)
1394 || (atomic_read(&inode->i_count) > 1))) 1393 || (atomic_read(&inode->i_count) > 1)))
1395 goto out; 1394 goto out;
1396 } 1395 }
@@ -1504,9 +1503,8 @@ static int do_fcntl_delete_lease(struct file *filp)
1504 1503
1505static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) 1504static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1506{ 1505{
1507 struct file_lock *fl; 1506 struct file_lock *fl, *ret;
1508 struct fasync_struct *new; 1507 struct fasync_struct *new;
1509 struct inode *inode = filp->f_path.dentry->d_inode;
1510 int error; 1508 int error;
1511 1509
1512 fl = lease_alloc(filp, arg); 1510 fl = lease_alloc(filp, arg);
@@ -1518,13 +1516,16 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1518 locks_free_lock(fl); 1516 locks_free_lock(fl);
1519 return -ENOMEM; 1517 return -ENOMEM;
1520 } 1518 }
1519 ret = fl;
1521 lock_flocks(); 1520 lock_flocks();
1522 error = __vfs_setlease(filp, arg, &fl); 1521 error = __vfs_setlease(filp, arg, &ret);
1523 if (error) { 1522 if (error) {
1524 unlock_flocks(); 1523 unlock_flocks();
1525 locks_free_lock(fl); 1524 locks_free_lock(fl);
1526 goto out_free_fasync; 1525 goto out_free_fasync;
1527 } 1526 }
1527 if (ret != fl)
1528 locks_free_lock(fl);
1528 1529
1529 /* 1530 /*
1530 * fasync_insert_entry() returns the old entry if any. 1531 * fasync_insert_entry() returns the old entry if any.
@@ -1532,17 +1533,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1532 * inserted it into the fasync list. Clear new so that 1533 * inserted it into the fasync list. Clear new so that
1533 * we don't release it here. 1534 * we don't release it here.
1534 */ 1535 */
1535 if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new)) 1536 if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
1536 new = NULL; 1537 new = NULL;
1537 1538
1538 if (error < 0) { 1539 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1539 /* remove lease just inserted by setlease */
1540 fl->fl_type = F_UNLCK | F_INPROGRESS;
1541 fl->fl_break_time = jiffies - 10;
1542 time_out_leases(inode);
1543 } else {
1544 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1545 }
1546 unlock_flocks(); 1540 unlock_flocks();
1547 1541
1548out_free_fasync: 1542out_free_fasync:
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 409dfd65e9a1..f9ddf0c388c8 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -555,9 +555,11 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
555 return __logfs_create(dir, dentry, inode, target, destlen); 555 return __logfs_create(dir, dentry, inode, target, destlen);
556} 556}
557 557
558static int logfs_permission(struct inode *inode, int mask) 558static int logfs_permission(struct inode *inode, int mask, unsigned int flags)
559{ 559{
560 return generic_permission(inode, mask, NULL); 560 if (flags & IPERM_FLAG_RCU)
561 return -ECHILD;
562 return generic_permission(inode, mask, flags, NULL);
561} 563}
562 564
563static int logfs_link(struct dentry *old_dentry, struct inode *dir, 565static int logfs_link(struct dentry *old_dentry, struct inode *dir,
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index d8c71ece098f..03b8c240aeda 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -141,13 +141,20 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
141 return __logfs_iget(sb, ino); 141 return __logfs_iget(sb, ino);
142} 142}
143 143
144static void logfs_i_callback(struct rcu_head *head)
145{
146 struct inode *inode = container_of(head, struct inode, i_rcu);
147 INIT_LIST_HEAD(&inode->i_dentry);
148 kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
149}
150
144static void __logfs_destroy_inode(struct inode *inode) 151static void __logfs_destroy_inode(struct inode *inode)
145{ 152{
146 struct logfs_inode *li = logfs_inode(inode); 153 struct logfs_inode *li = logfs_inode(inode);
147 154
148 BUG_ON(li->li_block); 155 BUG_ON(li->li_block);
149 list_del(&li->li_freeing_list); 156 list_del(&li->li_freeing_list);
150 kmem_cache_free(logfs_inode_cache, li); 157 call_rcu(&inode->i_rcu, logfs_i_callback);
151} 158}
152 159
153static void logfs_destroy_inode(struct inode *inode) 160static void logfs_destroy_inode(struct inode *inode)
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index f46ee8b0e135..9da29706f91c 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
828 super->s_journal_seg[i] = segno; 828 super->s_journal_seg[i] = segno;
829 super->s_journal_ec[i] = ec; 829 super->s_journal_ec[i] = ec;
830 logfs_set_segment_reserved(sb, segno); 830 logfs_set_segment_reserved(sb, segno);
831 err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); 831 err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
832 BUG_ON(err); /* mempool should prevent this */ 832 BUG_ON(err); /* mempool should prevent this */
833 err = logfs_erase_segment(sb, segno, 1); 833 err = logfs_erase_segment(sb, segno, 1);
834 BUG_ON(err); /* FIXME: remount-ro would be nicer */ 834 BUG_ON(err); /* FIXME: remount-ro would be nicer */
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 6127baf0e188..ee99a9f5dfd3 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
1994 1994
1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */ 1995 /* FIXME: transaction is part of logfs_block now. Is that enough? */
1996 err = logfs_write_buf(master_inode, page, 0); 1996 err = logfs_write_buf(master_inode, page, 0);
1997 if (err)
1998 move_page_to_inode(inode, page);
1999
1997 logfs_put_write_page(page); 2000 logfs_put_write_page(page);
1998 return err; 2001 return err;
1999} 2002}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index fb2020858a34..ae0b83f476a6 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,11 +68,18 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
68 return &ei->vfs_inode; 68 return &ei->vfs_inode;
69} 69}
70 70
71static void minix_destroy_inode(struct inode *inode) 71static void minix_i_callback(struct rcu_head *head)
72{ 72{
73 struct inode *inode = container_of(head, struct inode, i_rcu);
74 INIT_LIST_HEAD(&inode->i_dentry);
73 kmem_cache_free(minix_inode_cachep, minix_i(inode)); 75 kmem_cache_free(minix_inode_cachep, minix_i(inode));
74} 76}
75 77
78static void minix_destroy_inode(struct inode *inode)
79{
80 call_rcu(&inode->i_rcu, minix_i_callback);
81}
82
76static void init_once(void *foo) 83static void init_once(void *foo)
77{ 84{
78 struct minix_inode_info *ei = (struct minix_inode_info *) foo; 85 struct minix_inode_info *ei = (struct minix_inode_info *) foo;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index c0d35a3accef..1b9e07728a9f 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -23,7 +23,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
23 struct inode * inode = NULL; 23 struct inode * inode = NULL;
24 ino_t ino; 24 ino_t ino;
25 25
26 dentry->d_op = dir->i_sb->s_root->d_op; 26 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
27 27
28 if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen) 28 if (dentry->d_name.len > minix_sb(dir->i_sb)->s_namelen)
29 return ERR_PTR(-ENAMETOOLONG); 29 return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/namei.c b/fs/namei.c
index 5362af9b7372..19433cdba011 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL(putname);
169/* 169/*
170 * This does basic POSIX ACL permission checking 170 * This does basic POSIX ACL permission checking
171 */ 171 */
172static int acl_permission_check(struct inode *inode, int mask, 172static int acl_permission_check(struct inode *inode, int mask, unsigned int flags,
173 int (*check_acl)(struct inode *inode, int mask)) 173 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
174{ 174{
175 umode_t mode = inode->i_mode; 175 umode_t mode = inode->i_mode;
176 176
@@ -180,7 +180,7 @@ static int acl_permission_check(struct inode *inode, int mask,
180 mode >>= 6; 180 mode >>= 6;
181 else { 181 else {
182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 182 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
183 int error = check_acl(inode, mask); 183 int error = check_acl(inode, mask, flags);
184 if (error != -EAGAIN) 184 if (error != -EAGAIN)
185 return error; 185 return error;
186 } 186 }
@@ -198,25 +198,30 @@ static int acl_permission_check(struct inode *inode, int mask,
198} 198}
199 199
200/** 200/**
201 * generic_permission - check for access rights on a Posix-like filesystem 201 * generic_permission - check for access rights on a Posix-like filesystem
202 * @inode: inode to check access rights for 202 * @inode: inode to check access rights for
203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 203 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
204 * @check_acl: optional callback to check for Posix ACLs 204 * @check_acl: optional callback to check for Posix ACLs
205 * @flags IPERM_FLAG_ flags.
205 * 206 *
206 * Used to check for read/write/execute permissions on a file. 207 * Used to check for read/write/execute permissions on a file.
207 * We use "fsuid" for this, letting us set arbitrary permissions 208 * We use "fsuid" for this, letting us set arbitrary permissions
208 * for filesystem access without changing the "normal" uids which 209 * for filesystem access without changing the "normal" uids which
209 * are used for other things.. 210 * are used for other things.
211 *
212 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
213 * request cannot be satisfied (eg. requires blocking or too much complexity).
214 * It would then be called again in ref-walk mode.
210 */ 215 */
211int generic_permission(struct inode *inode, int mask, 216int generic_permission(struct inode *inode, int mask, unsigned int flags,
212 int (*check_acl)(struct inode *inode, int mask)) 217 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
213{ 218{
214 int ret; 219 int ret;
215 220
216 /* 221 /*
217 * Do the basic POSIX ACL permission checks. 222 * Do the basic POSIX ACL permission checks.
218 */ 223 */
219 ret = acl_permission_check(inode, mask, check_acl); 224 ret = acl_permission_check(inode, mask, flags, check_acl);
220 if (ret != -EACCES) 225 if (ret != -EACCES)
221 return ret; 226 return ret;
222 227
@@ -271,9 +276,10 @@ int inode_permission(struct inode *inode, int mask)
271 } 276 }
272 277
273 if (inode->i_op->permission) 278 if (inode->i_op->permission)
274 retval = inode->i_op->permission(inode, mask); 279 retval = inode->i_op->permission(inode, mask, 0);
275 else 280 else
276 retval = generic_permission(inode, mask, inode->i_op->check_acl); 281 retval = generic_permission(inode, mask, 0,
282 inode->i_op->check_acl);
277 283
278 if (retval) 284 if (retval)
279 return retval; 285 return retval;
@@ -362,6 +368,18 @@ void path_get(struct path *path)
362EXPORT_SYMBOL(path_get); 368EXPORT_SYMBOL(path_get);
363 369
364/** 370/**
371 * path_get_long - get a long reference to a path
372 * @path: path to get the reference to
373 *
374 * Given a path increment the reference count to the dentry and the vfsmount.
375 */
376void path_get_long(struct path *path)
377{
378 mntget_long(path->mnt);
379 dget(path->dentry);
380}
381
382/**
365 * path_put - put a reference to a path 383 * path_put - put a reference to a path
366 * @path: path to put the reference to 384 * @path: path to put the reference to
367 * 385 *
@@ -375,6 +393,185 @@ void path_put(struct path *path)
375EXPORT_SYMBOL(path_put); 393EXPORT_SYMBOL(path_put);
376 394
377/** 395/**
396 * path_put_long - put a long reference to a path
397 * @path: path to put the reference to
398 *
399 * Given a path decrement the reference count to the dentry and the vfsmount.
400 */
401void path_put_long(struct path *path)
402{
403 dput(path->dentry);
404 mntput_long(path->mnt);
405}
406
407/**
408 * nameidata_drop_rcu - drop this nameidata out of rcu-walk
409 * @nd: nameidata pathwalk data to drop
410 * @Returns: 0 on success, -ECHLID on failure
411 *
412 * Path walking has 2 modes, rcu-walk and ref-walk (see
413 * Documentation/filesystems/path-lookup.txt). __drop_rcu* functions attempt
414 * to drop out of rcu-walk mode and take normal reference counts on dentries
415 * and vfsmounts to transition to rcu-walk mode. __drop_rcu* functions take
416 * refcounts at the last known good point before rcu-walk got stuck, so
417 * ref-walk may continue from there. If this is not successful (eg. a seqcount
418 * has changed), then failure is returned and path walk restarts from the
419 * beginning in ref-walk mode.
420 *
421 * nameidata_drop_rcu attempts to drop the current nd->path and nd->root into
422 * ref-walk. Must be called from rcu-walk context.
423 */
424static int nameidata_drop_rcu(struct nameidata *nd)
425{
426 struct fs_struct *fs = current->fs;
427 struct dentry *dentry = nd->path.dentry;
428
429 BUG_ON(!(nd->flags & LOOKUP_RCU));
430 if (nd->root.mnt) {
431 spin_lock(&fs->lock);
432 if (nd->root.mnt != fs->root.mnt ||
433 nd->root.dentry != fs->root.dentry)
434 goto err_root;
435 }
436 spin_lock(&dentry->d_lock);
437 if (!__d_rcu_to_refcount(dentry, nd->seq))
438 goto err;
439 BUG_ON(nd->inode != dentry->d_inode);
440 spin_unlock(&dentry->d_lock);
441 if (nd->root.mnt) {
442 path_get(&nd->root);
443 spin_unlock(&fs->lock);
444 }
445 mntget(nd->path.mnt);
446
447 rcu_read_unlock();
448 br_read_unlock(vfsmount_lock);
449 nd->flags &= ~LOOKUP_RCU;
450 return 0;
451err:
452 spin_unlock(&dentry->d_lock);
453err_root:
454 if (nd->root.mnt)
455 spin_unlock(&fs->lock);
456 return -ECHILD;
457}
458
459/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
460static inline int nameidata_drop_rcu_maybe(struct nameidata *nd)
461{
462 if (nd->flags & LOOKUP_RCU)
463 return nameidata_drop_rcu(nd);
464 return 0;
465}
466
467/**
468 * nameidata_dentry_drop_rcu - drop nameidata and dentry out of rcu-walk
469 * @nd: nameidata pathwalk data to drop
470 * @dentry: dentry to drop
471 * @Returns: 0 on success, -ECHLID on failure
472 *
473 * nameidata_dentry_drop_rcu attempts to drop the current nd->path and nd->root,
474 * and dentry into ref-walk. @dentry must be a path found by a do_lookup call on
475 * @nd. Must be called from rcu-walk context.
476 */
477static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry)
478{
479 struct fs_struct *fs = current->fs;
480 struct dentry *parent = nd->path.dentry;
481
482 BUG_ON(!(nd->flags & LOOKUP_RCU));
483 if (nd->root.mnt) {
484 spin_lock(&fs->lock);
485 if (nd->root.mnt != fs->root.mnt ||
486 nd->root.dentry != fs->root.dentry)
487 goto err_root;
488 }
489 spin_lock(&parent->d_lock);
490 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
491 if (!__d_rcu_to_refcount(dentry, nd->seq))
492 goto err;
493 /*
494 * If the sequence check on the child dentry passed, then the child has
495 * not been removed from its parent. This means the parent dentry must
496 * be valid and able to take a reference at this point.
497 */
498 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
499 BUG_ON(!parent->d_count);
500 parent->d_count++;
501 spin_unlock(&dentry->d_lock);
502 spin_unlock(&parent->d_lock);
503 if (nd->root.mnt) {
504 path_get(&nd->root);
505 spin_unlock(&fs->lock);
506 }
507 mntget(nd->path.mnt);
508
509 rcu_read_unlock();
510 br_read_unlock(vfsmount_lock);
511 nd->flags &= ~LOOKUP_RCU;
512 return 0;
513err:
514 spin_unlock(&dentry->d_lock);
515 spin_unlock(&parent->d_lock);
516err_root:
517 if (nd->root.mnt)
518 spin_unlock(&fs->lock);
519 return -ECHILD;
520}
521
522/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
523static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
524{
525 if (nd->flags & LOOKUP_RCU)
526 return nameidata_dentry_drop_rcu(nd, dentry);
527 return 0;
528}
529
530/**
531 * nameidata_drop_rcu_last - drop nameidata ending path walk out of rcu-walk
532 * @nd: nameidata pathwalk data to drop
533 * @Returns: 0 on success, -ECHLID on failure
534 *
535 * nameidata_drop_rcu_last attempts to drop the current nd->path into ref-walk.
536 * nd->path should be the final element of the lookup, so nd->root is discarded.
537 * Must be called from rcu-walk context.
538 */
539static int nameidata_drop_rcu_last(struct nameidata *nd)
540{
541 struct dentry *dentry = nd->path.dentry;
542
543 BUG_ON(!(nd->flags & LOOKUP_RCU));
544 nd->flags &= ~LOOKUP_RCU;
545 nd->root.mnt = NULL;
546 spin_lock(&dentry->d_lock);
547 if (!__d_rcu_to_refcount(dentry, nd->seq))
548 goto err_unlock;
549 BUG_ON(nd->inode != dentry->d_inode);
550 spin_unlock(&dentry->d_lock);
551
552 mntget(nd->path.mnt);
553
554 rcu_read_unlock();
555 br_read_unlock(vfsmount_lock);
556
557 return 0;
558
559err_unlock:
560 spin_unlock(&dentry->d_lock);
561 rcu_read_unlock();
562 br_read_unlock(vfsmount_lock);
563 return -ECHILD;
564}
565
566/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
567static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
568{
569 if (likely(nd->flags & LOOKUP_RCU))
570 return nameidata_drop_rcu_last(nd);
571 return 0;
572}
573
574/**
378 * release_open_intent - free up open intent resources 575 * release_open_intent - free up open intent resources
379 * @nd: pointer to nameidata 576 * @nd: pointer to nameidata
380 */ 577 */
@@ -386,10 +583,26 @@ void release_open_intent(struct nameidata *nd)
386 fput(nd->intent.open.file); 583 fput(nd->intent.open.file);
387} 584}
388 585
586static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
587{
588 int status;
589
590 status = dentry->d_op->d_revalidate(dentry, nd);
591 if (status == -ECHILD) {
592 if (nameidata_dentry_drop_rcu(nd, dentry))
593 return status;
594 status = dentry->d_op->d_revalidate(dentry, nd);
595 }
596
597 return status;
598}
599
389static inline struct dentry * 600static inline struct dentry *
390do_revalidate(struct dentry *dentry, struct nameidata *nd) 601do_revalidate(struct dentry *dentry, struct nameidata *nd)
391{ 602{
392 int status = dentry->d_op->d_revalidate(dentry, nd); 603 int status;
604
605 status = d_revalidate(dentry, nd);
393 if (unlikely(status <= 0)) { 606 if (unlikely(status <= 0)) {
394 /* 607 /*
395 * The dentry failed validation. 608 * The dentry failed validation.
@@ -397,19 +610,36 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
397 * the dentry otherwise d_revalidate is asking us 610 * the dentry otherwise d_revalidate is asking us
398 * to return a fail status. 611 * to return a fail status.
399 */ 612 */
400 if (!status) { 613 if (status < 0) {
614 /* If we're in rcu-walk, we don't have a ref */
615 if (!(nd->flags & LOOKUP_RCU))
616 dput(dentry);
617 dentry = ERR_PTR(status);
618
619 } else {
620 /* Don't d_invalidate in rcu-walk mode */
621 if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
622 return ERR_PTR(-ECHILD);
401 if (!d_invalidate(dentry)) { 623 if (!d_invalidate(dentry)) {
402 dput(dentry); 624 dput(dentry);
403 dentry = NULL; 625 dentry = NULL;
404 } 626 }
405 } else {
406 dput(dentry);
407 dentry = ERR_PTR(status);
408 } 627 }
409 } 628 }
410 return dentry; 629 return dentry;
411} 630}
412 631
632static inline int need_reval_dot(struct dentry *dentry)
633{
634 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
635 return 0;
636
637 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
638 return 0;
639
640 return 1;
641}
642
413/* 643/*
414 * force_reval_path - force revalidation of a dentry 644 * force_reval_path - force revalidation of a dentry
415 * 645 *
@@ -433,13 +663,12 @@ force_reval_path(struct path *path, struct nameidata *nd)
433 663
434 /* 664 /*
435 * only check on filesystems where it's possible for the dentry to 665 * only check on filesystems where it's possible for the dentry to
436 * become stale. It's assumed that if this flag is set then the 666 * become stale.
437 * d_revalidate op will also be defined.
438 */ 667 */
439 if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) 668 if (!need_reval_dot(dentry))
440 return 0; 669 return 0;
441 670
442 status = dentry->d_op->d_revalidate(dentry, nd); 671 status = d_revalidate(dentry, nd);
443 if (status > 0) 672 if (status > 0)
444 return 0; 673 return 0;
445 674
@@ -459,26 +688,27 @@ force_reval_path(struct path *path, struct nameidata *nd)
459 * short-cut DAC fails, then call ->permission() to do more 688 * short-cut DAC fails, then call ->permission() to do more
460 * complete permission check. 689 * complete permission check.
461 */ 690 */
462static int exec_permission(struct inode *inode) 691static inline int exec_permission(struct inode *inode, unsigned int flags)
463{ 692{
464 int ret; 693 int ret;
465 694
466 if (inode->i_op->permission) { 695 if (inode->i_op->permission) {
467 ret = inode->i_op->permission(inode, MAY_EXEC); 696 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
468 if (!ret) 697 } else {
469 goto ok; 698 ret = acl_permission_check(inode, MAY_EXEC, flags,
470 return ret; 699 inode->i_op->check_acl);
471 } 700 }
472 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl); 701 if (likely(!ret))
473 if (!ret)
474 goto ok; 702 goto ok;
703 if (ret == -ECHILD)
704 return ret;
475 705
476 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 706 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
477 goto ok; 707 goto ok;
478 708
479 return ret; 709 return ret;
480ok: 710ok:
481 return security_inode_permission(inode, MAY_EXEC); 711 return security_inode_exec_permission(inode, flags);
482} 712}
483 713
484static __always_inline void set_root(struct nameidata *nd) 714static __always_inline void set_root(struct nameidata *nd)
@@ -489,8 +719,23 @@ static __always_inline void set_root(struct nameidata *nd)
489 719
490static int link_path_walk(const char *, struct nameidata *); 720static int link_path_walk(const char *, struct nameidata *);
491 721
722static __always_inline void set_root_rcu(struct nameidata *nd)
723{
724 if (!nd->root.mnt) {
725 struct fs_struct *fs = current->fs;
726 unsigned seq;
727
728 do {
729 seq = read_seqcount_begin(&fs->seq);
730 nd->root = fs->root;
731 } while (read_seqcount_retry(&fs->seq, seq));
732 }
733}
734
492static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link) 735static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
493{ 736{
737 int ret;
738
494 if (IS_ERR(link)) 739 if (IS_ERR(link))
495 goto fail; 740 goto fail;
496 741
@@ -500,8 +745,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
500 nd->path = nd->root; 745 nd->path = nd->root;
501 path_get(&nd->root); 746 path_get(&nd->root);
502 } 747 }
748 nd->inode = nd->path.dentry->d_inode;
503 749
504 return link_path_walk(link, nd); 750 ret = link_path_walk(link, nd);
751 return ret;
505fail: 752fail:
506 path_put(&nd->path); 753 path_put(&nd->path);
507 return PTR_ERR(link); 754 return PTR_ERR(link);
@@ -516,11 +763,12 @@ static void path_put_conditional(struct path *path, struct nameidata *nd)
516 763
517static inline void path_to_nameidata(struct path *path, struct nameidata *nd) 764static inline void path_to_nameidata(struct path *path, struct nameidata *nd)
518{ 765{
519 dput(nd->path.dentry); 766 if (!(nd->flags & LOOKUP_RCU)) {
520 if (nd->path.mnt != path->mnt) { 767 dput(nd->path.dentry);
521 mntput(nd->path.mnt); 768 if (nd->path.mnt != path->mnt)
522 nd->path.mnt = path->mnt; 769 mntput(nd->path.mnt);
523 } 770 }
771 nd->path.mnt = path->mnt;
524 nd->path.dentry = path->dentry; 772 nd->path.dentry = path->dentry;
525} 773}
526 774
@@ -535,9 +783,11 @@ __do_follow_link(struct path *path, struct nameidata *nd, void **p)
535 783
536 if (path->mnt != nd->path.mnt) { 784 if (path->mnt != nd->path.mnt) {
537 path_to_nameidata(path, nd); 785 path_to_nameidata(path, nd);
786 nd->inode = nd->path.dentry->d_inode;
538 dget(dentry); 787 dget(dentry);
539 } 788 }
540 mntget(path->mnt); 789 mntget(path->mnt);
790
541 nd->last_type = LAST_BIND; 791 nd->last_type = LAST_BIND;
542 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 792 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
543 error = PTR_ERR(*p); 793 error = PTR_ERR(*p);
@@ -591,6 +841,20 @@ loop:
591 return err; 841 return err;
592} 842}
593 843
844static int follow_up_rcu(struct path *path)
845{
846 struct vfsmount *parent;
847 struct dentry *mountpoint;
848
849 parent = path->mnt->mnt_parent;
850 if (parent == path->mnt)
851 return 0;
852 mountpoint = path->mnt->mnt_mountpoint;
853 path->dentry = mountpoint;
854 path->mnt = parent;
855 return 1;
856}
857
594int follow_up(struct path *path) 858int follow_up(struct path *path)
595{ 859{
596 struct vfsmount *parent; 860 struct vfsmount *parent;
@@ -612,9 +876,24 @@ int follow_up(struct path *path)
612 return 1; 876 return 1;
613} 877}
614 878
615/* no need for dcache_lock, as serialization is taken care in 879/*
616 * namespace.c 880 * serialization is taken care of in namespace.c
617 */ 881 */
882static void __follow_mount_rcu(struct nameidata *nd, struct path *path,
883 struct inode **inode)
884{
885 while (d_mountpoint(path->dentry)) {
886 struct vfsmount *mounted;
887 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
888 if (!mounted)
889 return;
890 path->mnt = mounted;
891 path->dentry = mounted->mnt_root;
892 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
893 *inode = path->dentry->d_inode;
894 }
895}
896
618static int __follow_mount(struct path *path) 897static int __follow_mount(struct path *path)
619{ 898{
620 int res = 0; 899 int res = 0;
@@ -645,9 +924,6 @@ static void follow_mount(struct path *path)
645 } 924 }
646} 925}
647 926
648/* no need for dcache_lock, as serialization is taken care in
649 * namespace.c
650 */
651int follow_down(struct path *path) 927int follow_down(struct path *path)
652{ 928{
653 struct vfsmount *mounted; 929 struct vfsmount *mounted;
@@ -663,7 +939,42 @@ int follow_down(struct path *path)
663 return 0; 939 return 0;
664} 940}
665 941
666static __always_inline void follow_dotdot(struct nameidata *nd) 942static int follow_dotdot_rcu(struct nameidata *nd)
943{
944 struct inode *inode = nd->inode;
945
946 set_root_rcu(nd);
947
948 while(1) {
949 if (nd->path.dentry == nd->root.dentry &&
950 nd->path.mnt == nd->root.mnt) {
951 break;
952 }
953 if (nd->path.dentry != nd->path.mnt->mnt_root) {
954 struct dentry *old = nd->path.dentry;
955 struct dentry *parent = old->d_parent;
956 unsigned seq;
957
958 seq = read_seqcount_begin(&parent->d_seq);
959 if (read_seqcount_retry(&old->d_seq, nd->seq))
960 return -ECHILD;
961 inode = parent->d_inode;
962 nd->path.dentry = parent;
963 nd->seq = seq;
964 break;
965 }
966 if (!follow_up_rcu(&nd->path))
967 break;
968 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
969 inode = nd->path.dentry->d_inode;
970 }
971 __follow_mount_rcu(nd, &nd->path, &inode);
972 nd->inode = inode;
973
974 return 0;
975}
976
977static void follow_dotdot(struct nameidata *nd)
667{ 978{
668 set_root(nd); 979 set_root(nd);
669 980
@@ -684,6 +995,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
684 break; 995 break;
685 } 996 }
686 follow_mount(&nd->path); 997 follow_mount(&nd->path);
998 nd->inode = nd->path.dentry->d_inode;
687} 999}
688 1000
689/* 1001/*
@@ -721,17 +1033,17 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
721 * It _is_ time-critical. 1033 * It _is_ time-critical.
722 */ 1034 */
723static int do_lookup(struct nameidata *nd, struct qstr *name, 1035static int do_lookup(struct nameidata *nd, struct qstr *name,
724 struct path *path) 1036 struct path *path, struct inode **inode)
725{ 1037{
726 struct vfsmount *mnt = nd->path.mnt; 1038 struct vfsmount *mnt = nd->path.mnt;
727 struct dentry *dentry, *parent; 1039 struct dentry *dentry, *parent = nd->path.dentry;
728 struct inode *dir; 1040 struct inode *dir;
729 /* 1041 /*
730 * See if the low-level filesystem might want 1042 * See if the low-level filesystem might want
731 * to use its own hash.. 1043 * to use its own hash..
732 */ 1044 */
733 if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) { 1045 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
734 int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name); 1046 int err = parent->d_op->d_hash(parent, nd->inode, name);
735 if (err < 0) 1047 if (err < 0)
736 return err; 1048 return err;
737 } 1049 }
@@ -741,21 +1053,44 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
741 * of a false negative due to a concurrent rename, we're going to 1053 * of a false negative due to a concurrent rename, we're going to
742 * do the non-racy lookup, below. 1054 * do the non-racy lookup, below.
743 */ 1055 */
744 dentry = __d_lookup(nd->path.dentry, name); 1056 if (nd->flags & LOOKUP_RCU) {
745 if (!dentry) 1057 unsigned seq;
746 goto need_lookup; 1058
1059 *inode = nd->inode;
1060 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1061 if (!dentry) {
1062 if (nameidata_drop_rcu(nd))
1063 return -ECHILD;
1064 goto need_lookup;
1065 }
1066 /* Memory barrier in read_seqcount_begin of child is enough */
1067 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1068 return -ECHILD;
1069
1070 nd->seq = seq;
1071 if (dentry->d_flags & DCACHE_OP_REVALIDATE)
1072 goto need_revalidate;
1073 path->mnt = mnt;
1074 path->dentry = dentry;
1075 __follow_mount_rcu(nd, path, inode);
1076 } else {
1077 dentry = __d_lookup(parent, name);
1078 if (!dentry)
1079 goto need_lookup;
747found: 1080found:
748 if (dentry->d_op && dentry->d_op->d_revalidate) 1081 if (dentry->d_flags & DCACHE_OP_REVALIDATE)
749 goto need_revalidate; 1082 goto need_revalidate;
750done: 1083done:
751 path->mnt = mnt; 1084 path->mnt = mnt;
752 path->dentry = dentry; 1085 path->dentry = dentry;
753 __follow_mount(path); 1086 __follow_mount(path);
1087 *inode = path->dentry->d_inode;
1088 }
754 return 0; 1089 return 0;
755 1090
756need_lookup: 1091need_lookup:
757 parent = nd->path.dentry;
758 dir = parent->d_inode; 1092 dir = parent->d_inode;
1093 BUG_ON(nd->inode != dir);
759 1094
760 mutex_lock(&dir->i_mutex); 1095 mutex_lock(&dir->i_mutex);
761 /* 1096 /*
@@ -817,7 +1152,6 @@ static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
817static int link_path_walk(const char *name, struct nameidata *nd) 1152static int link_path_walk(const char *name, struct nameidata *nd)
818{ 1153{
819 struct path next; 1154 struct path next;
820 struct inode *inode;
821 int err; 1155 int err;
822 unsigned int lookup_flags = nd->flags; 1156 unsigned int lookup_flags = nd->flags;
823 1157
@@ -826,18 +1160,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
826 if (!*name) 1160 if (!*name)
827 goto return_reval; 1161 goto return_reval;
828 1162
829 inode = nd->path.dentry->d_inode;
830 if (nd->depth) 1163 if (nd->depth)
831 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); 1164 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
832 1165
833 /* At this point we know we have a real path component. */ 1166 /* At this point we know we have a real path component. */
834 for(;;) { 1167 for(;;) {
1168 struct inode *inode;
835 unsigned long hash; 1169 unsigned long hash;
836 struct qstr this; 1170 struct qstr this;
837 unsigned int c; 1171 unsigned int c;
838 1172
839 nd->flags |= LOOKUP_CONTINUE; 1173 nd->flags |= LOOKUP_CONTINUE;
840 err = exec_permission(inode); 1174 if (nd->flags & LOOKUP_RCU) {
1175 err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1176 if (err == -ECHILD) {
1177 if (nameidata_drop_rcu(nd))
1178 return -ECHILD;
1179 goto exec_again;
1180 }
1181 } else {
1182exec_again:
1183 err = exec_permission(nd->inode, 0);
1184 }
841 if (err) 1185 if (err)
842 break; 1186 break;
843 1187
@@ -868,37 +1212,44 @@ static int link_path_walk(const char *name, struct nameidata *nd)
868 if (this.name[0] == '.') switch (this.len) { 1212 if (this.name[0] == '.') switch (this.len) {
869 default: 1213 default:
870 break; 1214 break;
871 case 2: 1215 case 2:
872 if (this.name[1] != '.') 1216 if (this.name[1] != '.')
873 break; 1217 break;
874 follow_dotdot(nd); 1218 if (nd->flags & LOOKUP_RCU) {
875 inode = nd->path.dentry->d_inode; 1219 if (follow_dotdot_rcu(nd))
1220 return -ECHILD;
1221 } else
1222 follow_dotdot(nd);
876 /* fallthrough */ 1223 /* fallthrough */
877 case 1: 1224 case 1:
878 continue; 1225 continue;
879 } 1226 }
880 /* This does the actual lookups.. */ 1227 /* This does the actual lookups.. */
881 err = do_lookup(nd, &this, &next); 1228 err = do_lookup(nd, &this, &next, &inode);
882 if (err) 1229 if (err)
883 break; 1230 break;
884
885 err = -ENOENT; 1231 err = -ENOENT;
886 inode = next.dentry->d_inode;
887 if (!inode) 1232 if (!inode)
888 goto out_dput; 1233 goto out_dput;
889 1234
890 if (inode->i_op->follow_link) { 1235 if (inode->i_op->follow_link) {
1236 /* We commonly drop rcu-walk here */
1237 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1238 return -ECHILD;
1239 BUG_ON(inode != next.dentry->d_inode);
891 err = do_follow_link(&next, nd); 1240 err = do_follow_link(&next, nd);
892 if (err) 1241 if (err)
893 goto return_err; 1242 goto return_err;
1243 nd->inode = nd->path.dentry->d_inode;
894 err = -ENOENT; 1244 err = -ENOENT;
895 inode = nd->path.dentry->d_inode; 1245 if (!nd->inode)
896 if (!inode)
897 break; 1246 break;
898 } else 1247 } else {
899 path_to_nameidata(&next, nd); 1248 path_to_nameidata(&next, nd);
1249 nd->inode = inode;
1250 }
900 err = -ENOTDIR; 1251 err = -ENOTDIR;
901 if (!inode->i_op->lookup) 1252 if (!nd->inode->i_op->lookup)
902 break; 1253 break;
903 continue; 1254 continue;
904 /* here ends the main loop */ 1255 /* here ends the main loop */
@@ -913,32 +1264,39 @@ last_component:
913 if (this.name[0] == '.') switch (this.len) { 1264 if (this.name[0] == '.') switch (this.len) {
914 default: 1265 default:
915 break; 1266 break;
916 case 2: 1267 case 2:
917 if (this.name[1] != '.') 1268 if (this.name[1] != '.')
918 break; 1269 break;
919 follow_dotdot(nd); 1270 if (nd->flags & LOOKUP_RCU) {
920 inode = nd->path.dentry->d_inode; 1271 if (follow_dotdot_rcu(nd))
1272 return -ECHILD;
1273 } else
1274 follow_dotdot(nd);
921 /* fallthrough */ 1275 /* fallthrough */
922 case 1: 1276 case 1:
923 goto return_reval; 1277 goto return_reval;
924 } 1278 }
925 err = do_lookup(nd, &this, &next); 1279 err = do_lookup(nd, &this, &next, &inode);
926 if (err) 1280 if (err)
927 break; 1281 break;
928 inode = next.dentry->d_inode;
929 if (follow_on_final(inode, lookup_flags)) { 1282 if (follow_on_final(inode, lookup_flags)) {
1283 if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
1284 return -ECHILD;
1285 BUG_ON(inode != next.dentry->d_inode);
930 err = do_follow_link(&next, nd); 1286 err = do_follow_link(&next, nd);
931 if (err) 1287 if (err)
932 goto return_err; 1288 goto return_err;
933 inode = nd->path.dentry->d_inode; 1289 nd->inode = nd->path.dentry->d_inode;
934 } else 1290 } else {
935 path_to_nameidata(&next, nd); 1291 path_to_nameidata(&next, nd);
1292 nd->inode = inode;
1293 }
936 err = -ENOENT; 1294 err = -ENOENT;
937 if (!inode) 1295 if (!nd->inode)
938 break; 1296 break;
939 if (lookup_flags & LOOKUP_DIRECTORY) { 1297 if (lookup_flags & LOOKUP_DIRECTORY) {
940 err = -ENOTDIR; 1298 err = -ENOTDIR;
941 if (!inode->i_op->lookup) 1299 if (!nd->inode->i_op->lookup)
942 break; 1300 break;
943 } 1301 }
944 goto return_base; 1302 goto return_base;
@@ -958,25 +1316,43 @@ return_reval:
958 * We bypassed the ordinary revalidation routines. 1316 * We bypassed the ordinary revalidation routines.
959 * We may need to check the cached dentry for staleness. 1317 * We may need to check the cached dentry for staleness.
960 */ 1318 */
961 if (nd->path.dentry && nd->path.dentry->d_sb && 1319 if (need_reval_dot(nd->path.dentry)) {
962 (nd->path.dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) {
963 err = -ESTALE;
964 /* Note: we do not d_invalidate() */ 1320 /* Note: we do not d_invalidate() */
965 if (!nd->path.dentry->d_op->d_revalidate( 1321 err = d_revalidate(nd->path.dentry, nd);
966 nd->path.dentry, nd)) 1322 if (!err)
1323 err = -ESTALE;
1324 if (err < 0)
967 break; 1325 break;
968 } 1326 }
969return_base: 1327return_base:
1328 if (nameidata_drop_rcu_last_maybe(nd))
1329 return -ECHILD;
970 return 0; 1330 return 0;
971out_dput: 1331out_dput:
972 path_put_conditional(&next, nd); 1332 if (!(nd->flags & LOOKUP_RCU))
1333 path_put_conditional(&next, nd);
973 break; 1334 break;
974 } 1335 }
975 path_put(&nd->path); 1336 if (!(nd->flags & LOOKUP_RCU))
1337 path_put(&nd->path);
976return_err: 1338return_err:
977 return err; 1339 return err;
978} 1340}
979 1341
1342static inline int path_walk_rcu(const char *name, struct nameidata *nd)
1343{
1344 current->total_link_count = 0;
1345
1346 return link_path_walk(name, nd);
1347}
1348
1349static inline int path_walk_simple(const char *name, struct nameidata *nd)
1350{
1351 current->total_link_count = 0;
1352
1353 return link_path_walk(name, nd);
1354}
1355
980static int path_walk(const char *name, struct nameidata *nd) 1356static int path_walk(const char *name, struct nameidata *nd)
981{ 1357{
982 struct path save = nd->path; 1358 struct path save = nd->path;
@@ -1002,6 +1378,93 @@ static int path_walk(const char *name, struct nameidata *nd)
1002 return result; 1378 return result;
1003} 1379}
1004 1380
1381static void path_finish_rcu(struct nameidata *nd)
1382{
1383 if (nd->flags & LOOKUP_RCU) {
1384 /* RCU dangling. Cancel it. */
1385 nd->flags &= ~LOOKUP_RCU;
1386 nd->root.mnt = NULL;
1387 rcu_read_unlock();
1388 br_read_unlock(vfsmount_lock);
1389 }
1390 if (nd->file)
1391 fput(nd->file);
1392}
1393
1394static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1395{
1396 int retval = 0;
1397 int fput_needed;
1398 struct file *file;
1399
1400 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1401 nd->flags = flags | LOOKUP_RCU;
1402 nd->depth = 0;
1403 nd->root.mnt = NULL;
1404 nd->file = NULL;
1405
1406 if (*name=='/') {
1407 struct fs_struct *fs = current->fs;
1408 unsigned seq;
1409
1410 br_read_lock(vfsmount_lock);
1411 rcu_read_lock();
1412
1413 do {
1414 seq = read_seqcount_begin(&fs->seq);
1415 nd->root = fs->root;
1416 nd->path = nd->root;
1417 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1418 } while (read_seqcount_retry(&fs->seq, seq));
1419
1420 } else if (dfd == AT_FDCWD) {
1421 struct fs_struct *fs = current->fs;
1422 unsigned seq;
1423
1424 br_read_lock(vfsmount_lock);
1425 rcu_read_lock();
1426
1427 do {
1428 seq = read_seqcount_begin(&fs->seq);
1429 nd->path = fs->pwd;
1430 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1431 } while (read_seqcount_retry(&fs->seq, seq));
1432
1433 } else {
1434 struct dentry *dentry;
1435
1436 file = fget_light(dfd, &fput_needed);
1437 retval = -EBADF;
1438 if (!file)
1439 goto out_fail;
1440
1441 dentry = file->f_path.dentry;
1442
1443 retval = -ENOTDIR;
1444 if (!S_ISDIR(dentry->d_inode->i_mode))
1445 goto fput_fail;
1446
1447 retval = file_permission(file, MAY_EXEC);
1448 if (retval)
1449 goto fput_fail;
1450
1451 nd->path = file->f_path;
1452 if (fput_needed)
1453 nd->file = file;
1454
1455 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1456 br_read_lock(vfsmount_lock);
1457 rcu_read_lock();
1458 }
1459 nd->inode = nd->path.dentry->d_inode;
1460 return 0;
1461
1462fput_fail:
1463 fput_light(file, fput_needed);
1464out_fail:
1465 return retval;
1466}
1467
1005static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1468static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1006{ 1469{
1007 int retval = 0; 1470 int retval = 0;
@@ -1042,6 +1505,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct namei
1042 1505
1043 fput_light(file, fput_needed); 1506 fput_light(file, fput_needed);
1044 } 1507 }
1508 nd->inode = nd->path.dentry->d_inode;
1045 return 0; 1509 return 0;
1046 1510
1047fput_fail: 1511fput_fail:
@@ -1054,16 +1518,53 @@ out_fail:
1054static int do_path_lookup(int dfd, const char *name, 1518static int do_path_lookup(int dfd, const char *name,
1055 unsigned int flags, struct nameidata *nd) 1519 unsigned int flags, struct nameidata *nd)
1056{ 1520{
1057 int retval = path_init(dfd, name, flags, nd); 1521 int retval;
1058 if (!retval) 1522
1059 retval = path_walk(name, nd); 1523 /*
1060 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1524 * Path walking is largely split up into 2 different synchronisation
1061 nd->path.dentry->d_inode)) 1525 * schemes, rcu-walk and ref-walk (explained in
1062 audit_inode(name, nd->path.dentry); 1526 * Documentation/filesystems/path-lookup.txt). These share much of the
1527 * path walk code, but some things particularly setup, cleanup, and
1528 * following mounts are sufficiently divergent that functions are
1529 * duplicated. Typically there is a function foo(), and its RCU
1530 * analogue, foo_rcu().
1531 *
1532 * -ECHILD is the error number of choice (just to avoid clashes) that
1533 * is returned if some aspect of an rcu-walk fails. Such an error must
1534 * be handled by restarting a traditional ref-walk (which will always
1535 * be able to complete).
1536 */
1537 retval = path_init_rcu(dfd, name, flags, nd);
1538 if (unlikely(retval))
1539 return retval;
1540 retval = path_walk_rcu(name, nd);
1541 path_finish_rcu(nd);
1063 if (nd->root.mnt) { 1542 if (nd->root.mnt) {
1064 path_put(&nd->root); 1543 path_put(&nd->root);
1065 nd->root.mnt = NULL; 1544 nd->root.mnt = NULL;
1066 } 1545 }
1546
1547 if (unlikely(retval == -ECHILD || retval == -ESTALE)) {
1548 /* slower, locked walk */
1549 if (retval == -ESTALE)
1550 flags |= LOOKUP_REVAL;
1551 retval = path_init(dfd, name, flags, nd);
1552 if (unlikely(retval))
1553 return retval;
1554 retval = path_walk(name, nd);
1555 if (nd->root.mnt) {
1556 path_put(&nd->root);
1557 nd->root.mnt = NULL;
1558 }
1559 }
1560
1561 if (likely(!retval)) {
1562 if (unlikely(!audit_dummy_context())) {
1563 if (nd->path.dentry && nd->inode)
1564 audit_inode(name, nd->path.dentry);
1565 }
1566 }
1567
1067 return retval; 1568 return retval;
1068} 1569}
1069 1570
@@ -1106,10 +1607,11 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1106 path_get(&nd->path); 1607 path_get(&nd->path);
1107 nd->root = nd->path; 1608 nd->root = nd->path;
1108 path_get(&nd->root); 1609 path_get(&nd->root);
1610 nd->inode = nd->path.dentry->d_inode;
1109 1611
1110 retval = path_walk(name, nd); 1612 retval = path_walk(name, nd);
1111 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1613 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1112 nd->path.dentry->d_inode)) 1614 nd->inode))
1113 audit_inode(name, nd->path.dentry); 1615 audit_inode(name, nd->path.dentry);
1114 1616
1115 path_put(&nd->root); 1617 path_put(&nd->root);
@@ -1125,7 +1627,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1125 struct dentry *dentry; 1627 struct dentry *dentry;
1126 int err; 1628 int err;
1127 1629
1128 err = exec_permission(inode); 1630 err = exec_permission(inode, 0);
1129 if (err) 1631 if (err)
1130 return ERR_PTR(err); 1632 return ERR_PTR(err);
1131 1633
@@ -1133,8 +1635,8 @@ static struct dentry *__lookup_hash(struct qstr *name,
1133 * See if the low-level filesystem might want 1635 * See if the low-level filesystem might want
1134 * to use its own hash.. 1636 * to use its own hash..
1135 */ 1637 */
1136 if (base->d_op && base->d_op->d_hash) { 1638 if (base->d_flags & DCACHE_OP_HASH) {
1137 err = base->d_op->d_hash(base, name); 1639 err = base->d_op->d_hash(base, inode, name);
1138 dentry = ERR_PTR(err); 1640 dentry = ERR_PTR(err);
1139 if (err < 0) 1641 if (err < 0)
1140 goto out; 1642 goto out;
@@ -1147,7 +1649,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1147 */ 1649 */
1148 dentry = d_lookup(base, name); 1650 dentry = d_lookup(base, name);
1149 1651
1150 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) 1652 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE))
1151 dentry = do_revalidate(dentry, nd); 1653 dentry = do_revalidate(dentry, nd);
1152 1654
1153 if (!dentry) 1655 if (!dentry)
@@ -1490,6 +1992,7 @@ out_unlock:
1490 mutex_unlock(&dir->d_inode->i_mutex); 1992 mutex_unlock(&dir->d_inode->i_mutex);
1491 dput(nd->path.dentry); 1993 dput(nd->path.dentry);
1492 nd->path.dentry = path->dentry; 1994 nd->path.dentry = path->dentry;
1995
1493 if (error) 1996 if (error)
1494 return error; 1997 return error;
1495 /* Don't check for write permission, don't truncate */ 1998 /* Don't check for write permission, don't truncate */
@@ -1584,6 +2087,9 @@ exit:
1584 return ERR_PTR(error); 2087 return ERR_PTR(error);
1585} 2088}
1586 2089
2090/*
2091 * Handle O_CREAT case for do_filp_open
2092 */
1587static struct file *do_last(struct nameidata *nd, struct path *path, 2093static struct file *do_last(struct nameidata *nd, struct path *path,
1588 int open_flag, int acc_mode, 2094 int open_flag, int acc_mode,
1589 int mode, const char *pathname) 2095 int mode, const char *pathname)
@@ -1597,50 +2103,25 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1597 follow_dotdot(nd); 2103 follow_dotdot(nd);
1598 dir = nd->path.dentry; 2104 dir = nd->path.dentry;
1599 case LAST_DOT: 2105 case LAST_DOT:
1600 if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) { 2106 if (need_reval_dot(dir)) {
1601 if (!dir->d_op->d_revalidate(dir, nd)) { 2107 error = d_revalidate(nd->path.dentry, nd);
2108 if (!error)
1602 error = -ESTALE; 2109 error = -ESTALE;
2110 if (error < 0)
1603 goto exit; 2111 goto exit;
1604 }
1605 } 2112 }
1606 /* fallthrough */ 2113 /* fallthrough */
1607 case LAST_ROOT: 2114 case LAST_ROOT:
1608 if (open_flag & O_CREAT) 2115 goto exit;
1609 goto exit;
1610 /* fallthrough */
1611 case LAST_BIND: 2116 case LAST_BIND:
1612 audit_inode(pathname, dir); 2117 audit_inode(pathname, dir);
1613 goto ok; 2118 goto ok;
1614 } 2119 }
1615 2120
1616 /* trailing slashes? */ 2121 /* trailing slashes? */
1617 if (nd->last.name[nd->last.len]) { 2122 if (nd->last.name[nd->last.len])
1618 if (open_flag & O_CREAT) 2123 goto exit;
1619 goto exit;
1620 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1621 }
1622
1623 /* just plain open? */
1624 if (!(open_flag & O_CREAT)) {
1625 error = do_lookup(nd, &nd->last, path);
1626 if (error)
1627 goto exit;
1628 error = -ENOENT;
1629 if (!path->dentry->d_inode)
1630 goto exit_dput;
1631 if (path->dentry->d_inode->i_op->follow_link)
1632 return NULL;
1633 error = -ENOTDIR;
1634 if (nd->flags & LOOKUP_DIRECTORY) {
1635 if (!path->dentry->d_inode->i_op->lookup)
1636 goto exit_dput;
1637 }
1638 path_to_nameidata(path, nd);
1639 audit_inode(pathname, nd->path.dentry);
1640 goto ok;
1641 }
1642 2124
1643 /* OK, it's O_CREAT */
1644 mutex_lock(&dir->d_inode->i_mutex); 2125 mutex_lock(&dir->d_inode->i_mutex);
1645 2126
1646 path->dentry = lookup_hash(nd); 2127 path->dentry = lookup_hash(nd);
@@ -1711,8 +2192,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1711 return NULL; 2192 return NULL;
1712 2193
1713 path_to_nameidata(path, nd); 2194 path_to_nameidata(path, nd);
2195 nd->inode = path->dentry->d_inode;
1714 error = -EISDIR; 2196 error = -EISDIR;
1715 if (S_ISDIR(path->dentry->d_inode->i_mode)) 2197 if (S_ISDIR(nd->inode->i_mode))
1716 goto exit; 2198 goto exit;
1717ok: 2199ok:
1718 filp = finish_open(nd, open_flag, acc_mode); 2200 filp = finish_open(nd, open_flag, acc_mode);
@@ -1743,11 +2225,14 @@ struct file *do_filp_open(int dfd, const char *pathname,
1743 struct path path; 2225 struct path path;
1744 int count = 0; 2226 int count = 0;
1745 int flag = open_to_namei_flags(open_flag); 2227 int flag = open_to_namei_flags(open_flag);
1746 int force_reval = 0; 2228 int flags;
1747 2229
1748 if (!(open_flag & O_CREAT)) 2230 if (!(open_flag & O_CREAT))
1749 mode = 0; 2231 mode = 0;
1750 2232
2233 /* Must never be set by userspace */
2234 open_flag &= ~FMODE_NONOTIFY;
2235
1751 /* 2236 /*
1752 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only 2237 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
1753 * check for O_DSYNC if the need any syncing at all we enforce it's 2238 * check for O_DSYNC if the need any syncing at all we enforce it's
@@ -1769,54 +2254,84 @@ struct file *do_filp_open(int dfd, const char *pathname,
1769 if (open_flag & O_APPEND) 2254 if (open_flag & O_APPEND)
1770 acc_mode |= MAY_APPEND; 2255 acc_mode |= MAY_APPEND;
1771 2256
1772 /* find the parent */ 2257 flags = LOOKUP_OPEN;
1773reval: 2258 if (open_flag & O_CREAT) {
1774 error = path_init(dfd, pathname, LOOKUP_PARENT, &nd); 2259 flags |= LOOKUP_CREATE;
2260 if (open_flag & O_EXCL)
2261 flags |= LOOKUP_EXCL;
2262 }
2263 if (open_flag & O_DIRECTORY)
2264 flags |= LOOKUP_DIRECTORY;
2265 if (!(open_flag & O_NOFOLLOW))
2266 flags |= LOOKUP_FOLLOW;
2267
2268 filp = get_empty_filp();
2269 if (!filp)
2270 return ERR_PTR(-ENFILE);
2271
2272 filp->f_flags = open_flag;
2273 nd.intent.open.file = filp;
2274 nd.intent.open.flags = flag;
2275 nd.intent.open.create_mode = mode;
2276
2277 if (open_flag & O_CREAT)
2278 goto creat;
2279
2280 /* !O_CREAT, simple open */
2281 error = do_path_lookup(dfd, pathname, flags, &nd);
2282 if (unlikely(error))
2283 goto out_filp;
2284 error = -ELOOP;
2285 if (!(nd.flags & LOOKUP_FOLLOW)) {
2286 if (nd.inode->i_op->follow_link)
2287 goto out_path;
2288 }
2289 error = -ENOTDIR;
2290 if (nd.flags & LOOKUP_DIRECTORY) {
2291 if (!nd.inode->i_op->lookup)
2292 goto out_path;
2293 }
2294 audit_inode(pathname, nd.path.dentry);
2295 filp = finish_open(&nd, open_flag, acc_mode);
2296 return filp;
2297
2298creat:
2299 /* OK, have to create the file. Find the parent. */
2300 error = path_init_rcu(dfd, pathname,
2301 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
1775 if (error) 2302 if (error)
1776 return ERR_PTR(error); 2303 goto out_filp;
1777 if (force_reval) 2304 error = path_walk_rcu(pathname, &nd);
1778 nd.flags |= LOOKUP_REVAL; 2305 path_finish_rcu(&nd);
2306 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2307 /* slower, locked walk */
2308 if (error == -ESTALE) {
2309reval:
2310 flags |= LOOKUP_REVAL;
2311 }
2312 error = path_init(dfd, pathname,
2313 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2314 if (error)
2315 goto out_filp;
1779 2316
1780 current->total_link_count = 0; 2317 error = path_walk_simple(pathname, &nd);
1781 error = link_path_walk(pathname, &nd);
1782 if (error) {
1783 filp = ERR_PTR(error);
1784 goto out;
1785 } 2318 }
1786 if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT)) 2319 if (unlikely(error))
2320 goto out_filp;
2321 if (unlikely(!audit_dummy_context()))
1787 audit_inode(pathname, nd.path.dentry); 2322 audit_inode(pathname, nd.path.dentry);
1788 2323
1789 /* 2324 /*
1790 * We have the parent and last component. 2325 * We have the parent and last component.
1791 */ 2326 */
1792 2327 nd.flags = flags;
1793 error = -ENFILE;
1794 filp = get_empty_filp();
1795 if (filp == NULL)
1796 goto exit_parent;
1797 nd.intent.open.file = filp;
1798 filp->f_flags = open_flag;
1799 nd.intent.open.flags = flag;
1800 nd.intent.open.create_mode = mode;
1801 nd.flags &= ~LOOKUP_PARENT;
1802 nd.flags |= LOOKUP_OPEN;
1803 if (open_flag & O_CREAT) {
1804 nd.flags |= LOOKUP_CREATE;
1805 if (open_flag & O_EXCL)
1806 nd.flags |= LOOKUP_EXCL;
1807 }
1808 if (open_flag & O_DIRECTORY)
1809 nd.flags |= LOOKUP_DIRECTORY;
1810 if (!(open_flag & O_NOFOLLOW))
1811 nd.flags |= LOOKUP_FOLLOW;
1812 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2328 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1813 while (unlikely(!filp)) { /* trailing symlink */ 2329 while (unlikely(!filp)) { /* trailing symlink */
1814 struct path holder; 2330 struct path holder;
1815 struct inode *inode = path.dentry->d_inode;
1816 void *cookie; 2331 void *cookie;
1817 error = -ELOOP; 2332 error = -ELOOP;
1818 /* S_ISDIR part is a temporary automount kludge */ 2333 /* S_ISDIR part is a temporary automount kludge */
1819 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode)) 2334 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(nd.inode->i_mode))
1820 goto exit_dput; 2335 goto exit_dput;
1821 if (count++ == 32) 2336 if (count++ == 32)
1822 goto exit_dput; 2337 goto exit_dput;
@@ -1837,36 +2352,33 @@ reval:
1837 goto exit_dput; 2352 goto exit_dput;
1838 error = __do_follow_link(&path, &nd, &cookie); 2353 error = __do_follow_link(&path, &nd, &cookie);
1839 if (unlikely(error)) { 2354 if (unlikely(error)) {
2355 if (!IS_ERR(cookie) && nd.inode->i_op->put_link)
2356 nd.inode->i_op->put_link(path.dentry, &nd, cookie);
1840 /* nd.path had been dropped */ 2357 /* nd.path had been dropped */
1841 if (!IS_ERR(cookie) && inode->i_op->put_link) 2358 nd.path = path;
1842 inode->i_op->put_link(path.dentry, &nd, cookie); 2359 goto out_path;
1843 path_put(&path);
1844 release_open_intent(&nd);
1845 filp = ERR_PTR(error);
1846 goto out;
1847 } 2360 }
1848 holder = path; 2361 holder = path;
1849 nd.flags &= ~LOOKUP_PARENT; 2362 nd.flags &= ~LOOKUP_PARENT;
1850 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2363 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1851 if (inode->i_op->put_link) 2364 if (nd.inode->i_op->put_link)
1852 inode->i_op->put_link(holder.dentry, &nd, cookie); 2365 nd.inode->i_op->put_link(holder.dentry, &nd, cookie);
1853 path_put(&holder); 2366 path_put(&holder);
1854 } 2367 }
1855out: 2368out:
1856 if (nd.root.mnt) 2369 if (nd.root.mnt)
1857 path_put(&nd.root); 2370 path_put(&nd.root);
1858 if (filp == ERR_PTR(-ESTALE) && !force_reval) { 2371 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
1859 force_reval = 1;
1860 goto reval; 2372 goto reval;
1861 }
1862 return filp; 2373 return filp;
1863 2374
1864exit_dput: 2375exit_dput:
1865 path_put_conditional(&path, &nd); 2376 path_put_conditional(&path, &nd);
2377out_path:
2378 path_put(&nd.path);
2379out_filp:
1866 if (!IS_ERR(nd.intent.open.file)) 2380 if (!IS_ERR(nd.intent.open.file))
1867 release_open_intent(&nd); 2381 release_open_intent(&nd);
1868exit_parent:
1869 path_put(&nd.path);
1870 filp = ERR_PTR(error); 2382 filp = ERR_PTR(error);
1871 goto out; 2383 goto out;
1872} 2384}
@@ -2127,12 +2639,10 @@ void dentry_unhash(struct dentry *dentry)
2127{ 2639{
2128 dget(dentry); 2640 dget(dentry);
2129 shrink_dcache_parent(dentry); 2641 shrink_dcache_parent(dentry);
2130 spin_lock(&dcache_lock);
2131 spin_lock(&dentry->d_lock); 2642 spin_lock(&dentry->d_lock);
2132 if (atomic_read(&dentry->d_count) == 2) 2643 if (dentry->d_count == 2)
2133 __d_drop(dentry); 2644 __d_drop(dentry);
2134 spin_unlock(&dentry->d_lock); 2645 spin_unlock(&dentry->d_lock);
2135 spin_unlock(&dcache_lock);
2136} 2646}
2137 2647
2138int vfs_rmdir(struct inode *dir, struct dentry *dentry) 2648int vfs_rmdir(struct inode *dir, struct dentry *dentry)
diff --git a/fs/namespace.c b/fs/namespace.c
index 8a415c9c5e55..3ddfd9046c44 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -13,7 +13,6 @@
13#include <linux/sched.h> 13#include <linux/sched.h>
14#include <linux/spinlock.h> 14#include <linux/spinlock.h>
15#include <linux/percpu.h> 15#include <linux/percpu.h>
16#include <linux/smp_lock.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/kernel.h> 17#include <linux/kernel.h>
19#include <linux/acct.h> 18#include <linux/acct.h>
@@ -139,6 +138,64 @@ void mnt_release_group_id(struct vfsmount *mnt)
139 mnt->mnt_group_id = 0; 138 mnt->mnt_group_id = 0;
140} 139}
141 140
141/*
142 * vfsmount lock must be held for read
143 */
144static inline void mnt_add_count(struct vfsmount *mnt, int n)
145{
146#ifdef CONFIG_SMP
147 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
148#else
149 preempt_disable();
150 mnt->mnt_count += n;
151 preempt_enable();
152#endif
153}
154
155static inline void mnt_set_count(struct vfsmount *mnt, int n)
156{
157#ifdef CONFIG_SMP
158 this_cpu_write(mnt->mnt_pcp->mnt_count, n);
159#else
160 mnt->mnt_count = n;
161#endif
162}
163
164/*
165 * vfsmount lock must be held for read
166 */
167static inline void mnt_inc_count(struct vfsmount *mnt)
168{
169 mnt_add_count(mnt, 1);
170}
171
172/*
173 * vfsmount lock must be held for read
174 */
175static inline void mnt_dec_count(struct vfsmount *mnt)
176{
177 mnt_add_count(mnt, -1);
178}
179
180/*
181 * vfsmount lock must be held for write
182 */
183unsigned int mnt_get_count(struct vfsmount *mnt)
184{
185#ifdef CONFIG_SMP
186 unsigned int count = atomic_read(&mnt->mnt_longrefs);
187 int cpu;
188
189 for_each_possible_cpu(cpu) {
190 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
191 }
192
193 return count;
194#else
195 return mnt->mnt_count;
196#endif
197}
198
142struct vfsmount *alloc_vfsmnt(const char *name) 199struct vfsmount *alloc_vfsmnt(const char *name)
143{ 200{
144 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
@@ -155,7 +212,17 @@ struct vfsmount *alloc_vfsmnt(const char *name)
155 goto out_free_id; 212 goto out_free_id;
156 } 213 }
157 214
158 atomic_set(&mnt->mnt_count, 1); 215#ifdef CONFIG_SMP
216 mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
217 if (!mnt->mnt_pcp)
218 goto out_free_devname;
219
220 atomic_set(&mnt->mnt_longrefs, 1);
221#else
222 mnt->mnt_count = 1;
223 mnt->mnt_writers = 0;
224#endif
225
159 INIT_LIST_HEAD(&mnt->mnt_hash); 226 INIT_LIST_HEAD(&mnt->mnt_hash);
160 INIT_LIST_HEAD(&mnt->mnt_child); 227 INIT_LIST_HEAD(&mnt->mnt_child);
161 INIT_LIST_HEAD(&mnt->mnt_mounts); 228 INIT_LIST_HEAD(&mnt->mnt_mounts);
@@ -167,13 +234,6 @@ struct vfsmount *alloc_vfsmnt(const char *name)
167#ifdef CONFIG_FSNOTIFY 234#ifdef CONFIG_FSNOTIFY
168 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); 235 INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
169#endif 236#endif
170#ifdef CONFIG_SMP
171 mnt->mnt_writers = alloc_percpu(int);
172 if (!mnt->mnt_writers)
173 goto out_free_devname;
174#else
175 mnt->mnt_writers = 0;
176#endif
177 } 237 }
178 return mnt; 238 return mnt;
179 239
@@ -217,32 +277,32 @@ int __mnt_is_readonly(struct vfsmount *mnt)
217} 277}
218EXPORT_SYMBOL_GPL(__mnt_is_readonly); 278EXPORT_SYMBOL_GPL(__mnt_is_readonly);
219 279
220static inline void inc_mnt_writers(struct vfsmount *mnt) 280static inline void mnt_inc_writers(struct vfsmount *mnt)
221{ 281{
222#ifdef CONFIG_SMP 282#ifdef CONFIG_SMP
223 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++; 283 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
224#else 284#else
225 mnt->mnt_writers++; 285 mnt->mnt_writers++;
226#endif 286#endif
227} 287}
228 288
229static inline void dec_mnt_writers(struct vfsmount *mnt) 289static inline void mnt_dec_writers(struct vfsmount *mnt)
230{ 290{
231#ifdef CONFIG_SMP 291#ifdef CONFIG_SMP
232 (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--; 292 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
233#else 293#else
234 mnt->mnt_writers--; 294 mnt->mnt_writers--;
235#endif 295#endif
236} 296}
237 297
238static unsigned int count_mnt_writers(struct vfsmount *mnt) 298static unsigned int mnt_get_writers(struct vfsmount *mnt)
239{ 299{
240#ifdef CONFIG_SMP 300#ifdef CONFIG_SMP
241 unsigned int count = 0; 301 unsigned int count = 0;
242 int cpu; 302 int cpu;
243 303
244 for_each_possible_cpu(cpu) { 304 for_each_possible_cpu(cpu) {
245 count += *per_cpu_ptr(mnt->mnt_writers, cpu); 305 count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
246 } 306 }
247 307
248 return count; 308 return count;
@@ -274,9 +334,9 @@ int mnt_want_write(struct vfsmount *mnt)
274 int ret = 0; 334 int ret = 0;
275 335
276 preempt_disable(); 336 preempt_disable();
277 inc_mnt_writers(mnt); 337 mnt_inc_writers(mnt);
278 /* 338 /*
279 * The store to inc_mnt_writers must be visible before we pass 339 * The store to mnt_inc_writers must be visible before we pass
280 * MNT_WRITE_HOLD loop below, so that the slowpath can see our 340 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
281 * incremented count after it has set MNT_WRITE_HOLD. 341 * incremented count after it has set MNT_WRITE_HOLD.
282 */ 342 */
@@ -290,7 +350,7 @@ int mnt_want_write(struct vfsmount *mnt)
290 */ 350 */
291 smp_rmb(); 351 smp_rmb();
292 if (__mnt_is_readonly(mnt)) { 352 if (__mnt_is_readonly(mnt)) {
293 dec_mnt_writers(mnt); 353 mnt_dec_writers(mnt);
294 ret = -EROFS; 354 ret = -EROFS;
295 goto out; 355 goto out;
296 } 356 }
@@ -318,7 +378,7 @@ int mnt_clone_write(struct vfsmount *mnt)
318 if (__mnt_is_readonly(mnt)) 378 if (__mnt_is_readonly(mnt))
319 return -EROFS; 379 return -EROFS;
320 preempt_disable(); 380 preempt_disable();
321 inc_mnt_writers(mnt); 381 mnt_inc_writers(mnt);
322 preempt_enable(); 382 preempt_enable();
323 return 0; 383 return 0;
324} 384}
@@ -352,7 +412,7 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file);
352void mnt_drop_write(struct vfsmount *mnt) 412void mnt_drop_write(struct vfsmount *mnt)
353{ 413{
354 preempt_disable(); 414 preempt_disable();
355 dec_mnt_writers(mnt); 415 mnt_dec_writers(mnt);
356 preempt_enable(); 416 preempt_enable();
357} 417}
358EXPORT_SYMBOL_GPL(mnt_drop_write); 418EXPORT_SYMBOL_GPL(mnt_drop_write);
@@ -385,7 +445,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
385 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while 445 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
386 * we're counting up here. 446 * we're counting up here.
387 */ 447 */
388 if (count_mnt_writers(mnt) > 0) 448 if (mnt_get_writers(mnt) > 0)
389 ret = -EBUSY; 449 ret = -EBUSY;
390 else 450 else
391 mnt->mnt_flags |= MNT_READONLY; 451 mnt->mnt_flags |= MNT_READONLY;
@@ -419,7 +479,7 @@ void free_vfsmnt(struct vfsmount *mnt)
419 kfree(mnt->mnt_devname); 479 kfree(mnt->mnt_devname);
420 mnt_free_id(mnt); 480 mnt_free_id(mnt);
421#ifdef CONFIG_SMP 481#ifdef CONFIG_SMP
422 free_percpu(mnt->mnt_writers); 482 free_percpu(mnt->mnt_pcp);
423#endif 483#endif
424 kmem_cache_free(mnt_cache, mnt); 484 kmem_cache_free(mnt_cache, mnt);
425} 485}
@@ -493,6 +553,27 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
493} 553}
494 554
495/* 555/*
556 * Clear dentry's mounted state if it has no remaining mounts.
557 * vfsmount_lock must be held for write.
558 */
559static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
560{
561 unsigned u;
562
563 for (u = 0; u < HASH_SIZE; u++) {
564 struct vfsmount *p;
565
566 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
567 if (p->mnt_mountpoint == dentry)
568 return;
569 }
570 }
571 spin_lock(&dentry->d_lock);
572 dentry->d_flags &= ~DCACHE_MOUNTED;
573 spin_unlock(&dentry->d_lock);
574}
575
576/*
496 * vfsmount lock must be held for write 577 * vfsmount lock must be held for write
497 */ 578 */
498static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 579static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
@@ -503,7 +584,7 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
503 mnt->mnt_mountpoint = mnt->mnt_root; 584 mnt->mnt_mountpoint = mnt->mnt_root;
504 list_del_init(&mnt->mnt_child); 585 list_del_init(&mnt->mnt_child);
505 list_del_init(&mnt->mnt_hash); 586 list_del_init(&mnt->mnt_hash);
506 old_path->dentry->d_mounted--; 587 dentry_reset_mounted(old_path->mnt, old_path->dentry);
507} 588}
508 589
509/* 590/*
@@ -514,7 +595,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
514{ 595{
515 child_mnt->mnt_parent = mntget(mnt); 596 child_mnt->mnt_parent = mntget(mnt);
516 child_mnt->mnt_mountpoint = dget(dentry); 597 child_mnt->mnt_mountpoint = dget(dentry);
517 dentry->d_mounted++; 598 spin_lock(&dentry->d_lock);
599 dentry->d_flags |= DCACHE_MOUNTED;
600 spin_unlock(&dentry->d_lock);
518} 601}
519 602
520/* 603/*
@@ -630,9 +713,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
630 return NULL; 713 return NULL;
631} 714}
632 715
633static inline void __mntput(struct vfsmount *mnt) 716static inline void mntfree(struct vfsmount *mnt)
634{ 717{
635 struct super_block *sb = mnt->mnt_sb; 718 struct super_block *sb = mnt->mnt_sb;
719
636 /* 720 /*
637 * This probably indicates that somebody messed 721 * This probably indicates that somebody messed
638 * up a mnt_want/drop_write() pair. If this 722 * up a mnt_want/drop_write() pair. If this
@@ -640,38 +724,123 @@ static inline void __mntput(struct vfsmount *mnt)
640 * to make r/w->r/o transitions. 724 * to make r/w->r/o transitions.
641 */ 725 */
642 /* 726 /*
643 * atomic_dec_and_lock() used to deal with ->mnt_count decrements 727 * The locking used to deal with mnt_count decrement provides barriers,
644 * provides barriers, so count_mnt_writers() below is safe. AV 728 * so mnt_get_writers() below is safe.
645 */ 729 */
646 WARN_ON(count_mnt_writers(mnt)); 730 WARN_ON(mnt_get_writers(mnt));
647 fsnotify_vfsmount_delete(mnt); 731 fsnotify_vfsmount_delete(mnt);
648 dput(mnt->mnt_root); 732 dput(mnt->mnt_root);
649 free_vfsmnt(mnt); 733 free_vfsmnt(mnt);
650 deactivate_super(sb); 734 deactivate_super(sb);
651} 735}
652 736
653void mntput_no_expire(struct vfsmount *mnt) 737#ifdef CONFIG_SMP
654{ 738static inline void __mntput(struct vfsmount *mnt, int longrefs)
655repeat: 739{
656 if (atomic_add_unless(&mnt->mnt_count, -1, 1)) 740 if (!longrefs) {
657 return; 741put_again:
742 br_read_lock(vfsmount_lock);
743 if (likely(atomic_read(&mnt->mnt_longrefs))) {
744 mnt_dec_count(mnt);
745 br_read_unlock(vfsmount_lock);
746 return;
747 }
748 br_read_unlock(vfsmount_lock);
749 } else {
750 BUG_ON(!atomic_read(&mnt->mnt_longrefs));
751 if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
752 return;
753 }
754
658 br_write_lock(vfsmount_lock); 755 br_write_lock(vfsmount_lock);
659 if (!atomic_dec_and_test(&mnt->mnt_count)) { 756 if (!longrefs)
757 mnt_dec_count(mnt);
758 else
759 atomic_dec(&mnt->mnt_longrefs);
760 if (mnt_get_count(mnt)) {
660 br_write_unlock(vfsmount_lock); 761 br_write_unlock(vfsmount_lock);
661 return; 762 return;
662 } 763 }
663 if (likely(!mnt->mnt_pinned)) { 764 if (unlikely(mnt->mnt_pinned)) {
765 mnt_add_count(mnt, mnt->mnt_pinned + 1);
766 mnt->mnt_pinned = 0;
664 br_write_unlock(vfsmount_lock); 767 br_write_unlock(vfsmount_lock);
665 __mntput(mnt); 768 acct_auto_close_mnt(mnt);
769 goto put_again;
770 }
771 br_write_unlock(vfsmount_lock);
772 mntfree(mnt);
773}
774#else
775static inline void __mntput(struct vfsmount *mnt, int longrefs)
776{
777put_again:
778 mnt_dec_count(mnt);
779 if (likely(mnt_get_count(mnt)))
666 return; 780 return;
781 br_write_lock(vfsmount_lock);
782 if (unlikely(mnt->mnt_pinned)) {
783 mnt_add_count(mnt, mnt->mnt_pinned + 1);
784 mnt->mnt_pinned = 0;
785 br_write_unlock(vfsmount_lock);
786 acct_auto_close_mnt(mnt);
787 goto put_again;
667 } 788 }
668 atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
669 mnt->mnt_pinned = 0;
670 br_write_unlock(vfsmount_lock); 789 br_write_unlock(vfsmount_lock);
671 acct_auto_close_mnt(mnt); 790 mntfree(mnt);
672 goto repeat; 791}
792#endif
793
794static void mntput_no_expire(struct vfsmount *mnt)
795{
796 __mntput(mnt, 0);
797}
798
799void mntput(struct vfsmount *mnt)
800{
801 if (mnt) {
802 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
803 if (unlikely(mnt->mnt_expiry_mark))
804 mnt->mnt_expiry_mark = 0;
805 __mntput(mnt, 0);
806 }
807}
808EXPORT_SYMBOL(mntput);
809
810struct vfsmount *mntget(struct vfsmount *mnt)
811{
812 if (mnt)
813 mnt_inc_count(mnt);
814 return mnt;
673} 815}
674EXPORT_SYMBOL(mntput_no_expire); 816EXPORT_SYMBOL(mntget);
817
818void mntput_long(struct vfsmount *mnt)
819{
820#ifdef CONFIG_SMP
821 if (mnt) {
822 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
823 if (unlikely(mnt->mnt_expiry_mark))
824 mnt->mnt_expiry_mark = 0;
825 __mntput(mnt, 1);
826 }
827#else
828 mntput(mnt);
829#endif
830}
831EXPORT_SYMBOL(mntput_long);
832
833struct vfsmount *mntget_long(struct vfsmount *mnt)
834{
835#ifdef CONFIG_SMP
836 if (mnt)
837 atomic_inc(&mnt->mnt_longrefs);
838 return mnt;
839#else
840 return mntget(mnt);
841#endif
842}
843EXPORT_SYMBOL(mntget_long);
675 844
676void mnt_pin(struct vfsmount *mnt) 845void mnt_pin(struct vfsmount *mnt)
677{ 846{
@@ -679,19 +848,17 @@ void mnt_pin(struct vfsmount *mnt)
679 mnt->mnt_pinned++; 848 mnt->mnt_pinned++;
680 br_write_unlock(vfsmount_lock); 849 br_write_unlock(vfsmount_lock);
681} 850}
682
683EXPORT_SYMBOL(mnt_pin); 851EXPORT_SYMBOL(mnt_pin);
684 852
685void mnt_unpin(struct vfsmount *mnt) 853void mnt_unpin(struct vfsmount *mnt)
686{ 854{
687 br_write_lock(vfsmount_lock); 855 br_write_lock(vfsmount_lock);
688 if (mnt->mnt_pinned) { 856 if (mnt->mnt_pinned) {
689 atomic_inc(&mnt->mnt_count); 857 mnt_inc_count(mnt);
690 mnt->mnt_pinned--; 858 mnt->mnt_pinned--;
691 } 859 }
692 br_write_unlock(vfsmount_lock); 860 br_write_unlock(vfsmount_lock);
693} 861}
694
695EXPORT_SYMBOL(mnt_unpin); 862EXPORT_SYMBOL(mnt_unpin);
696 863
697static inline void mangle(struct seq_file *m, const char *s) 864static inline void mangle(struct seq_file *m, const char *s)
@@ -986,12 +1153,13 @@ int may_umount_tree(struct vfsmount *mnt)
986 int minimum_refs = 0; 1153 int minimum_refs = 0;
987 struct vfsmount *p; 1154 struct vfsmount *p;
988 1155
989 br_read_lock(vfsmount_lock); 1156 /* write lock needed for mnt_get_count */
1157 br_write_lock(vfsmount_lock);
990 for (p = mnt; p; p = next_mnt(p, mnt)) { 1158 for (p = mnt; p; p = next_mnt(p, mnt)) {
991 actual_refs += atomic_read(&p->mnt_count); 1159 actual_refs += mnt_get_count(p);
992 minimum_refs += 2; 1160 minimum_refs += 2;
993 } 1161 }
994 br_read_unlock(vfsmount_lock); 1162 br_write_unlock(vfsmount_lock);
995 1163
996 if (actual_refs > minimum_refs) 1164 if (actual_refs > minimum_refs)
997 return 0; 1165 return 0;
@@ -1018,10 +1186,10 @@ int may_umount(struct vfsmount *mnt)
1018{ 1186{
1019 int ret = 1; 1187 int ret = 1;
1020 down_read(&namespace_sem); 1188 down_read(&namespace_sem);
1021 br_read_lock(vfsmount_lock); 1189 br_write_lock(vfsmount_lock);
1022 if (propagate_mount_busy(mnt, 2)) 1190 if (propagate_mount_busy(mnt, 2))
1023 ret = 0; 1191 ret = 0;
1024 br_read_unlock(vfsmount_lock); 1192 br_write_unlock(vfsmount_lock);
1025 up_read(&namespace_sem); 1193 up_read(&namespace_sem);
1026 return ret; 1194 return ret;
1027} 1195}
@@ -1048,7 +1216,7 @@ void release_mounts(struct list_head *head)
1048 dput(dentry); 1216 dput(dentry);
1049 mntput(m); 1217 mntput(m);
1050 } 1218 }
1051 mntput(mnt); 1219 mntput_long(mnt);
1052 } 1220 }
1053} 1221}
1054 1222
@@ -1074,7 +1242,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1074 list_del_init(&p->mnt_child); 1242 list_del_init(&p->mnt_child);
1075 if (p->mnt_parent != p) { 1243 if (p->mnt_parent != p) {
1076 p->mnt_parent->mnt_ghosts++; 1244 p->mnt_parent->mnt_ghosts++;
1077 p->mnt_mountpoint->d_mounted--; 1245 dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint);
1078 } 1246 }
1079 change_mnt_propagation(p, MS_PRIVATE); 1247 change_mnt_propagation(p, MS_PRIVATE);
1080 } 1248 }
@@ -1103,8 +1271,16 @@ static int do_umount(struct vfsmount *mnt, int flags)
1103 flags & (MNT_FORCE | MNT_DETACH)) 1271 flags & (MNT_FORCE | MNT_DETACH))
1104 return -EINVAL; 1272 return -EINVAL;
1105 1273
1106 if (atomic_read(&mnt->mnt_count) != 2) 1274 /*
1275 * probably don't strictly need the lock here if we examined
1276 * all race cases, but it's a slowpath.
1277 */
1278 br_write_lock(vfsmount_lock);
1279 if (mnt_get_count(mnt) != 2) {
1280 br_write_lock(vfsmount_lock);
1107 return -EBUSY; 1281 return -EBUSY;
1282 }
1283 br_write_unlock(vfsmount_lock);
1108 1284
1109 if (!xchg(&mnt->mnt_expiry_mark, 1)) 1285 if (!xchg(&mnt->mnt_expiry_mark, 1))
1110 return -EAGAIN; 1286 return -EAGAIN;
@@ -1793,7 +1969,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
1793 1969
1794unlock: 1970unlock:
1795 up_write(&namespace_sem); 1971 up_write(&namespace_sem);
1796 mntput(newmnt); 1972 mntput_long(newmnt);
1797 return err; 1973 return err;
1798} 1974}
1799 1975
@@ -2126,11 +2302,11 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2126 if (fs) { 2302 if (fs) {
2127 if (p == fs->root.mnt) { 2303 if (p == fs->root.mnt) {
2128 rootmnt = p; 2304 rootmnt = p;
2129 fs->root.mnt = mntget(q); 2305 fs->root.mnt = mntget_long(q);
2130 } 2306 }
2131 if (p == fs->pwd.mnt) { 2307 if (p == fs->pwd.mnt) {
2132 pwdmnt = p; 2308 pwdmnt = p;
2133 fs->pwd.mnt = mntget(q); 2309 fs->pwd.mnt = mntget_long(q);
2134 } 2310 }
2135 } 2311 }
2136 p = next_mnt(p, mnt_ns->root); 2312 p = next_mnt(p, mnt_ns->root);
@@ -2139,9 +2315,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2139 up_write(&namespace_sem); 2315 up_write(&namespace_sem);
2140 2316
2141 if (rootmnt) 2317 if (rootmnt)
2142 mntput(rootmnt); 2318 mntput_long(rootmnt);
2143 if (pwdmnt) 2319 if (pwdmnt)
2144 mntput(pwdmnt); 2320 mntput_long(pwdmnt);
2145 2321
2146 return new_ns; 2322 return new_ns;
2147} 2323}
@@ -2328,6 +2504,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2328 touch_mnt_namespace(current->nsproxy->mnt_ns); 2504 touch_mnt_namespace(current->nsproxy->mnt_ns);
2329 br_write_unlock(vfsmount_lock); 2505 br_write_unlock(vfsmount_lock);
2330 chroot_fs_refs(&root, &new); 2506 chroot_fs_refs(&root, &new);
2507
2331 error = 0; 2508 error = 0;
2332 path_put(&root_parent); 2509 path_put(&root_parent);
2333 path_put(&parent_path); 2510 path_put(&parent_path);
@@ -2354,6 +2531,7 @@ static void __init init_mount_tree(void)
2354 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); 2531 mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
2355 if (IS_ERR(mnt)) 2532 if (IS_ERR(mnt))
2356 panic("Can't create rootfs"); 2533 panic("Can't create rootfs");
2534
2357 ns = create_mnt_ns(mnt); 2535 ns = create_mnt_ns(mnt);
2358 if (IS_ERR(ns)) 2536 if (IS_ERR(ns))
2359 panic("Can't allocate initial namespace"); 2537 panic("Can't allocate initial namespace");
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index aac8832e919e..28f136d4aaec 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -17,9 +17,9 @@
17#include <linux/kernel.h> 17#include <linux/kernel.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/namei.h>
20#include <asm/uaccess.h> 21#include <asm/uaccess.h>
21#include <asm/byteorder.h> 22#include <asm/byteorder.h>
22#include <linux/smp_lock.h>
23 23
24#include <linux/ncp_fs.h> 24#include <linux/ncp_fs.h>
25 25
@@ -75,9 +75,12 @@ const struct inode_operations ncp_dir_inode_operations =
75 * Dentry operations routines 75 * Dentry operations routines
76 */ 76 */
77static int ncp_lookup_validate(struct dentry *, struct nameidata *); 77static int ncp_lookup_validate(struct dentry *, struct nameidata *);
78static int ncp_hash_dentry(struct dentry *, struct qstr *); 78static int ncp_hash_dentry(const struct dentry *, const struct inode *,
79static int ncp_compare_dentry (struct dentry *, struct qstr *, struct qstr *); 79 struct qstr *);
80static int ncp_delete_dentry(struct dentry *); 80static int ncp_compare_dentry(const struct dentry *, const struct inode *,
81 const struct dentry *, const struct inode *,
82 unsigned int, const char *, const struct qstr *);
83static int ncp_delete_dentry(const struct dentry *);
81 84
82static const struct dentry_operations ncp_dentry_operations = 85static const struct dentry_operations ncp_dentry_operations =
83{ 86{
@@ -114,10 +117,10 @@ static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator)
114 117
115#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) 118#define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS)
116 119
117static inline int ncp_case_sensitive(struct dentry *dentry) 120static inline int ncp_case_sensitive(const struct inode *i)
118{ 121{
119#ifdef CONFIG_NCPFS_NFS_NS 122#ifdef CONFIG_NCPFS_NFS_NS
120 return ncp_namespace(dentry->d_inode) == NW_NS_NFS; 123 return ncp_namespace(i) == NW_NS_NFS;
121#else 124#else
122 return 0; 125 return 0;
123#endif /* CONFIG_NCPFS_NFS_NS */ 126#endif /* CONFIG_NCPFS_NFS_NS */
@@ -128,14 +131,16 @@ static inline int ncp_case_sensitive(struct dentry *dentry)
128 * is case-sensitive. 131 * is case-sensitive.
129 */ 132 */
130static int 133static int
131ncp_hash_dentry(struct dentry *dentry, struct qstr *this) 134ncp_hash_dentry(const struct dentry *dentry, const struct inode *inode,
135 struct qstr *this)
132{ 136{
133 if (!ncp_case_sensitive(dentry)) { 137 if (!ncp_case_sensitive(inode)) {
138 struct super_block *sb = dentry->d_sb;
134 struct nls_table *t; 139 struct nls_table *t;
135 unsigned long hash; 140 unsigned long hash;
136 int i; 141 int i;
137 142
138 t = NCP_IO_TABLE(dentry); 143 t = NCP_IO_TABLE(sb);
139 hash = init_name_hash(); 144 hash = init_name_hash();
140 for (i=0; i<this->len ; i++) 145 for (i=0; i<this->len ; i++)
141 hash = partial_name_hash(ncp_tolower(t, this->name[i]), 146 hash = partial_name_hash(ncp_tolower(t, this->name[i]),
@@ -146,15 +151,17 @@ ncp_hash_dentry(struct dentry *dentry, struct qstr *this)
146} 151}
147 152
148static int 153static int
149ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) 154ncp_compare_dentry(const struct dentry *parent, const struct inode *pinode,
155 const struct dentry *dentry, const struct inode *inode,
156 unsigned int len, const char *str, const struct qstr *name)
150{ 157{
151 if (a->len != b->len) 158 if (len != name->len)
152 return 1; 159 return 1;
153 160
154 if (ncp_case_sensitive(dentry)) 161 if (ncp_case_sensitive(pinode))
155 return strncmp(a->name, b->name, a->len); 162 return strncmp(str, name->name, len);
156 163
157 return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len); 164 return ncp_strnicmp(NCP_IO_TABLE(pinode->i_sb), str, name->name, len);
158} 165}
159 166
160/* 167/*
@@ -163,7 +170,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b)
163 * Closing files can be safely postponed until iput() - it's done there anyway. 170 * Closing files can be safely postponed until iput() - it's done there anyway.
164 */ 171 */
165static int 172static int
166ncp_delete_dentry(struct dentry * dentry) 173ncp_delete_dentry(const struct dentry * dentry)
167{ 174{
168 struct inode *inode = dentry->d_inode; 175 struct inode *inode = dentry->d_inode;
169 176
@@ -302,6 +309,9 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
302 int res, val = 0, len; 309 int res, val = 0, len;
303 __u8 __name[NCP_MAXPATHLEN + 1]; 310 __u8 __name[NCP_MAXPATHLEN + 1];
304 311
312 if (nd->flags & LOOKUP_RCU)
313 return -ECHILD;
314
305 parent = dget_parent(dentry); 315 parent = dget_parent(dentry);
306 dir = parent->d_inode; 316 dir = parent->d_inode;
307 317
@@ -385,21 +395,21 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
385 } 395 }
386 396
387 /* If a pointer is invalid, we search the dentry. */ 397 /* If a pointer is invalid, we search the dentry. */
388 spin_lock(&dcache_lock); 398 spin_lock(&parent->d_lock);
389 next = parent->d_subdirs.next; 399 next = parent->d_subdirs.next;
390 while (next != &parent->d_subdirs) { 400 while (next != &parent->d_subdirs) {
391 dent = list_entry(next, struct dentry, d_u.d_child); 401 dent = list_entry(next, struct dentry, d_u.d_child);
392 if ((unsigned long)dent->d_fsdata == fpos) { 402 if ((unsigned long)dent->d_fsdata == fpos) {
393 if (dent->d_inode) 403 if (dent->d_inode)
394 dget_locked(dent); 404 dget(dent);
395 else 405 else
396 dent = NULL; 406 dent = NULL;
397 spin_unlock(&dcache_lock); 407 spin_unlock(&parent->d_lock);
398 goto out; 408 goto out;
399 } 409 }
400 next = next->next; 410 next = next->next;
401 } 411 }
402 spin_unlock(&dcache_lock); 412 spin_unlock(&parent->d_lock);
403 return NULL; 413 return NULL;
404 414
405out: 415out:
@@ -593,7 +603,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
593 qname.hash = full_name_hash(qname.name, qname.len); 603 qname.hash = full_name_hash(qname.name, qname.len);
594 604
595 if (dentry->d_op && dentry->d_op->d_hash) 605 if (dentry->d_op && dentry->d_op->d_hash)
596 if (dentry->d_op->d_hash(dentry, &qname) != 0) 606 if (dentry->d_op->d_hash(dentry, dentry->d_inode, &qname) != 0)
597 goto end_advance; 607 goto end_advance;
598 608
599 newdent = d_lookup(dentry, &qname); 609 newdent = d_lookup(dentry, &qname);
@@ -612,35 +622,12 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
612 shrink_dcache_parent(newdent); 622 shrink_dcache_parent(newdent);
613 623
614 /* 624 /*
615 * It is not as dangerous as it looks. NetWare's OS2 namespace is 625 * NetWare's OS2 namespace is case preserving yet case
616 * case preserving yet case insensitive. So we update dentry's name 626 * insensitive. So we update dentry's name as received from
617 * as received from server. We found dentry via d_lookup with our 627 * server. Parent dir's i_mutex is locked because we're in
618 * hash, so we know that hash does not change, and so replacing name 628 * readdir.
619 * should be reasonably safe.
620 */ 629 */
621 if (qname.len == newdent->d_name.len && 630 dentry_update_name_case(newdent, &qname);
622 memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) {
623 struct inode *inode = newdent->d_inode;
624
625 /*
626 * Inside ncpfs all uses of d_name are either for debugging,
627 * or on functions which acquire inode mutex (mknod, creat,
628 * lookup). So grab i_mutex here, to be sure. d_path
629 * uses dcache_lock when generating path, so we should too.
630 * And finally d_compare is protected by dentry's d_lock, so
631 * here we go.
632 */
633 if (inode)
634 mutex_lock(&inode->i_mutex);
635 spin_lock(&dcache_lock);
636 spin_lock(&newdent->d_lock);
637 memcpy((char *) newdent->d_name.name, qname.name,
638 newdent->d_name.len);
639 spin_unlock(&newdent->d_lock);
640 spin_unlock(&dcache_lock);
641 if (inode)
642 mutex_unlock(&inode->i_mutex);
643 }
644 } 631 }
645 632
646 if (!newdent->d_inode) { 633 if (!newdent->d_inode) {
@@ -650,7 +637,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
650 entry->ino = iunique(dir->i_sb, 2); 637 entry->ino = iunique(dir->i_sb, 2);
651 inode = ncp_iget(dir->i_sb, entry); 638 inode = ncp_iget(dir->i_sb, entry);
652 if (inode) { 639 if (inode) {
653 newdent->d_op = &ncp_dentry_operations; 640 d_set_d_op(newdent, &ncp_dentry_operations);
654 d_instantiate(newdent, inode); 641 d_instantiate(newdent, inode);
655 if (!hashed) 642 if (!hashed)
656 d_rehash(newdent); 643 d_rehash(newdent);
@@ -658,7 +645,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
658 } else { 645 } else {
659 struct inode *inode = newdent->d_inode; 646 struct inode *inode = newdent->d_inode;
660 647
661 mutex_lock(&inode->i_mutex); 648 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
662 ncp_update_inode2(inode, entry); 649 ncp_update_inode2(inode, entry);
663 mutex_unlock(&inode->i_mutex); 650 mutex_unlock(&inode->i_mutex);
664 } 651 }
@@ -906,7 +893,7 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc
906 if (inode) { 893 if (inode) {
907 ncp_new_dentry(dentry); 894 ncp_new_dentry(dentry);
908add_entry: 895add_entry:
909 dentry->d_op = &ncp_dentry_operations; 896 d_set_d_op(dentry, &ncp_dentry_operations);
910 d_add(dentry, inode); 897 d_add(dentry, inode);
911 error = 0; 898 error = 0;
912 } 899 }
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 6c754f70c529..cb50aaf981df 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -17,7 +17,6 @@
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/smp_lock.h>
21 20
22#include <linux/ncp_fs.h> 21#include <linux/ncp_fs.h>
23#include "ncplib_kernel.h" 22#include "ncplib_kernel.h"
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d290545aa0c4..9b39a5dd4131 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -26,10 +26,10 @@
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/smp_lock.h>
30#include <linux/vfs.h> 29#include <linux/vfs.h>
31#include <linux/mount.h> 30#include <linux/mount.h>
32#include <linux/seq_file.h> 31#include <linux/seq_file.h>
32#include <linux/namei.h>
33 33
34#include <linux/ncp_fs.h> 34#include <linux/ncp_fs.h>
35 35
@@ -59,11 +59,18 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
59 return &ei->vfs_inode; 59 return &ei->vfs_inode;
60} 60}
61 61
62static void ncp_destroy_inode(struct inode *inode) 62static void ncp_i_callback(struct rcu_head *head)
63{ 63{
64 struct inode *inode = container_of(head, struct inode, i_rcu);
65 INIT_LIST_HEAD(&inode->i_dentry);
64 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); 66 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
65} 67}
66 68
69static void ncp_destroy_inode(struct inode *inode)
70{
71 call_rcu(&inode->i_rcu, ncp_i_callback);
72}
73
67static void init_once(void *foo) 74static void init_once(void *foo)
68{ 75{
69 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; 76 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
@@ -310,7 +317,12 @@ static void ncp_stop_tasks(struct ncp_server *server) {
310 sk->sk_write_space = server->write_space; 317 sk->sk_write_space = server->write_space;
311 release_sock(sk); 318 release_sock(sk);
312 del_timer_sync(&server->timeout_tm); 319 del_timer_sync(&server->timeout_tm);
313 flush_scheduled_work(); 320
321 flush_work_sync(&server->rcv.tq);
322 if (sk->sk_socket->type == SOCK_STREAM)
323 flush_work_sync(&server->tx.tq);
324 else
325 flush_work_sync(&server->timeout_tq);
314} 326}
315 327
316static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt) 328static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt)
@@ -711,7 +723,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
711 sb->s_root = d_alloc_root(root_inode); 723 sb->s_root = d_alloc_root(root_inode);
712 if (!sb->s_root) 724 if (!sb->s_root)
713 goto out_no_root; 725 goto out_no_root;
714 sb->s_root->d_op = &ncp_root_dentry_operations; 726 d_set_d_op(sb->s_root, &ncp_root_dentry_operations);
715 return 0; 727 return 0;
716 728
717out_no_root: 729out_no_root:
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c2a1f9a155c3..d40a547e3377 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -17,7 +17,6 @@
17#include <linux/mount.h> 17#include <linux/mount.h>
18#include <linux/slab.h> 18#include <linux/slab.h>
19#include <linux/highuid.h> 19#include <linux/highuid.h>
20#include <linux/smp_lock.h>
21#include <linux/vmalloc.h> 20#include <linux/vmalloc.h>
22#include <linux/sched.h> 21#include <linux/sched.h>
23 22
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 3c57eca634ce..1220df75ff22 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -135,7 +135,7 @@ int ncp__vol2io(struct ncp_server *, unsigned char *, unsigned int *,
135 const unsigned char *, unsigned int, int); 135 const unsigned char *, unsigned int, int);
136 136
137#define NCP_ESC ':' 137#define NCP_ESC ':'
138#define NCP_IO_TABLE(dentry) (NCP_SERVER((dentry)->d_inode)->nls_io) 138#define NCP_IO_TABLE(sb) (NCP_SBP(sb)->nls_io)
139#define ncp_tolower(t, c) nls_tolower(t, c) 139#define ncp_tolower(t, c) nls_tolower(t, c)
140#define ncp_toupper(t, c) nls_toupper(t, c) 140#define ncp_toupper(t, c) nls_toupper(t, c)
141#define ncp_strnicmp(t, s1, s2, len) \ 141#define ncp_strnicmp(t, s1, s2, len) \
@@ -150,15 +150,15 @@ int ncp__io2vol(unsigned char *, unsigned int *,
150int ncp__vol2io(unsigned char *, unsigned int *, 150int ncp__vol2io(unsigned char *, unsigned int *,
151 const unsigned char *, unsigned int, int); 151 const unsigned char *, unsigned int, int);
152 152
153#define NCP_IO_TABLE(dentry) NULL 153#define NCP_IO_TABLE(sb) NULL
154#define ncp_tolower(t, c) tolower(c) 154#define ncp_tolower(t, c) tolower(c)
155#define ncp_toupper(t, c) toupper(c) 155#define ncp_toupper(t, c) toupper(c)
156#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U) 156#define ncp_io2vol(S,m,i,n,k,U) ncp__io2vol(m,i,n,k,U)
157#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U) 157#define ncp_vol2io(S,m,i,n,k,U) ncp__vol2io(m,i,n,k,U)
158 158
159 159
160static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1, 160static inline int ncp_strnicmp(const struct nls_table *t,
161 const unsigned char *s2, int len) 161 const unsigned char *s1, const unsigned char *s2, int len)
162{ 162{
163 while (len--) { 163 while (len--) {
164 if (tolower(*s1++) != tolower(*s2++)) 164 if (tolower(*s1++) != tolower(*s2++))
@@ -193,7 +193,7 @@ ncp_renew_dentries(struct dentry *parent)
193 struct list_head *next; 193 struct list_head *next;
194 struct dentry *dentry; 194 struct dentry *dentry;
195 195
196 spin_lock(&dcache_lock); 196 spin_lock(&parent->d_lock);
197 next = parent->d_subdirs.next; 197 next = parent->d_subdirs.next;
198 while (next != &parent->d_subdirs) { 198 while (next != &parent->d_subdirs) {
199 dentry = list_entry(next, struct dentry, d_u.d_child); 199 dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -205,7 +205,7 @@ ncp_renew_dentries(struct dentry *parent)
205 205
206 next = next->next; 206 next = next->next;
207 } 207 }
208 spin_unlock(&dcache_lock); 208 spin_unlock(&parent->d_lock);
209} 209}
210 210
211static inline void 211static inline void
@@ -215,7 +215,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
215 struct list_head *next; 215 struct list_head *next;
216 struct dentry *dentry; 216 struct dentry *dentry;
217 217
218 spin_lock(&dcache_lock); 218 spin_lock(&parent->d_lock);
219 next = parent->d_subdirs.next; 219 next = parent->d_subdirs.next;
220 while (next != &parent->d_subdirs) { 220 while (next != &parent->d_subdirs) {
221 dentry = list_entry(next, struct dentry, d_u.d_child); 221 dentry = list_entry(next, struct dentry, d_u.d_child);
@@ -223,7 +223,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
223 ncp_age_dentry(server, dentry); 223 ncp_age_dentry(server, dentry);
224 next = next->next; 224 next = next->next;
225 } 225 }
226 spin_unlock(&dcache_lock); 226 spin_unlock(&parent->d_lock);
227} 227}
228 228
229struct ncp_cache_head { 229struct ncp_cache_head {
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index aeec017fe814..93a8b3bd69e3 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -9,7 +9,6 @@
9#include <linux/completion.h> 9#include <linux/completion.h>
10#include <linux/ip.h> 10#include <linux/ip.h>
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/smp_lock.h>
13#include <linux/sunrpc/svc.h> 12#include <linux/sunrpc/svc.h>
14#include <linux/sunrpc/svcsock.h> 13#include <linux/sunrpc/svcsock.h>
15#include <linux/nfs_fs.h> 14#include <linux/nfs_fs.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 232a7eead33a..1fd62fc49be3 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -11,7 +11,6 @@
11#include <linux/module.h> 11#include <linux/module.h>
12#include <linux/sched.h> 12#include <linux/sched.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/smp_lock.h>
15#include <linux/spinlock.h> 14#include <linux/spinlock.h>
16 15
17#include <linux/nfs4.h> 16#include <linux/nfs4.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 07ac3847e562..d33da530097a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
34#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/kmemleak.h>
37 38
38#include "delegation.h" 39#include "delegation.h"
39#include "iostat.h" 40#include "iostat.h"
@@ -56,7 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *,
56 struct inode *, struct dentry *); 57 struct inode *, struct dentry *);
57static int nfs_fsync_dir(struct file *, int); 58static int nfs_fsync_dir(struct file *, int);
58static loff_t nfs_llseek_dir(struct file *, loff_t, int); 59static loff_t nfs_llseek_dir(struct file *, loff_t, int);
59static int nfs_readdir_clear_array(struct page*, gfp_t); 60static void nfs_readdir_clear_array(struct page*);
60 61
61const struct file_operations nfs_dir_operations = { 62const struct file_operations nfs_dir_operations = {
62 .llseek = nfs_llseek_dir, 63 .llseek = nfs_llseek_dir,
@@ -82,8 +83,8 @@ const struct inode_operations nfs_dir_inode_operations = {
82 .setattr = nfs_setattr, 83 .setattr = nfs_setattr,
83}; 84};
84 85
85const struct address_space_operations nfs_dir_addr_space_ops = { 86const struct address_space_operations nfs_dir_aops = {
86 .releasepage = nfs_readdir_clear_array, 87 .freepage = nfs_readdir_clear_array,
87}; 88};
88 89
89#ifdef CONFIG_NFS_V3 90#ifdef CONFIG_NFS_V3
@@ -161,6 +162,7 @@ struct nfs_cache_array_entry {
161 u64 cookie; 162 u64 cookie;
162 u64 ino; 163 u64 ino;
163 struct qstr string; 164 struct qstr string;
165 unsigned char d_type;
164}; 166};
165 167
166struct nfs_cache_array { 168struct nfs_cache_array {
@@ -170,14 +172,13 @@ struct nfs_cache_array {
170 struct nfs_cache_array_entry array[0]; 172 struct nfs_cache_array_entry array[0];
171}; 173};
172 174
173#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
174
175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); 175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
176typedef struct { 176typedef struct {
177 struct file *file; 177 struct file *file;
178 struct page *page; 178 struct page *page;
179 unsigned long page_index; 179 unsigned long page_index;
180 u64 *dir_cookie; 180 u64 *dir_cookie;
181 u64 last_cookie;
181 loff_t current_index; 182 loff_t current_index;
182 decode_dirent_t decode; 183 decode_dirent_t decode;
183 184
@@ -194,9 +195,13 @@ typedef struct {
194static 195static
195struct nfs_cache_array *nfs_readdir_get_array(struct page *page) 196struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
196{ 197{
198 void *ptr;
197 if (page == NULL) 199 if (page == NULL)
198 return ERR_PTR(-EIO); 200 return ERR_PTR(-EIO);
199 return (struct nfs_cache_array *)kmap(page); 201 ptr = kmap(page);
202 if (ptr == NULL)
203 return ERR_PTR(-ENOMEM);
204 return ptr;
200} 205}
201 206
202static 207static
@@ -209,14 +214,15 @@ void nfs_readdir_release_array(struct page *page)
209 * we are freeing strings created by nfs_add_to_readdir_array() 214 * we are freeing strings created by nfs_add_to_readdir_array()
210 */ 215 */
211static 216static
212int nfs_readdir_clear_array(struct page *page, gfp_t mask) 217void nfs_readdir_clear_array(struct page *page)
213{ 218{
214 struct nfs_cache_array *array = nfs_readdir_get_array(page); 219 struct nfs_cache_array *array;
215 int i; 220 int i;
221
222 array = kmap_atomic(page, KM_USER0);
216 for (i = 0; i < array->size; i++) 223 for (i = 0; i < array->size; i++)
217 kfree(array->array[i].string.name); 224 kfree(array->array[i].string.name);
218 nfs_readdir_release_array(page); 225 kunmap_atomic(array, KM_USER0);
219 return 0;
220} 226}
221 227
222/* 228/*
@@ -231,6 +237,11 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
231 string->name = kmemdup(name, len, GFP_KERNEL); 237 string->name = kmemdup(name, len, GFP_KERNEL);
232 if (string->name == NULL) 238 if (string->name == NULL)
233 return -ENOMEM; 239 return -ENOMEM;
240 /*
241 * Avoid a kmemleak false positive. The pointer to the name is stored
242 * in a page cache page which kmemleak does not scan.
243 */
244 kmemleak_not_leak(string->name);
234 string->hash = full_name_hash(name, len); 245 string->hash = full_name_hash(name, len);
235 return 0; 246 return 0;
236} 247}
@@ -244,20 +255,24 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
244 255
245 if (IS_ERR(array)) 256 if (IS_ERR(array))
246 return PTR_ERR(array); 257 return PTR_ERR(array);
247 ret = -EIO;
248 if (array->size >= MAX_READDIR_ARRAY)
249 goto out;
250 258
251 cache_entry = &array->array[array->size]; 259 cache_entry = &array->array[array->size];
260
261 /* Check that this entry lies within the page bounds */
262 ret = -ENOSPC;
263 if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
264 goto out;
265
252 cache_entry->cookie = entry->prev_cookie; 266 cache_entry->cookie = entry->prev_cookie;
253 cache_entry->ino = entry->ino; 267 cache_entry->ino = entry->ino;
268 cache_entry->d_type = entry->d_type;
254 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); 269 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
255 if (ret) 270 if (ret)
256 goto out; 271 goto out;
257 array->last_cookie = entry->cookie; 272 array->last_cookie = entry->cookie;
258 if (entry->eof == 1)
259 array->eof_index = array->size;
260 array->size++; 273 array->size++;
274 if (entry->eof != 0)
275 array->eof_index = array->size;
261out: 276out:
262 nfs_readdir_release_array(page); 277 nfs_readdir_release_array(page);
263 return ret; 278 return ret;
@@ -272,7 +287,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
272 if (diff < 0) 287 if (diff < 0)
273 goto out_eof; 288 goto out_eof;
274 if (diff >= array->size) { 289 if (diff >= array->size) {
275 if (array->eof_index > 0) 290 if (array->eof_index >= 0)
276 goto out_eof; 291 goto out_eof;
277 desc->current_index += array->size; 292 desc->current_index += array->size;
278 return -EAGAIN; 293 return -EAGAIN;
@@ -281,8 +296,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
281 index = (unsigned int)diff; 296 index = (unsigned int)diff;
282 *desc->dir_cookie = array->array[index].cookie; 297 *desc->dir_cookie = array->array[index].cookie;
283 desc->cache_entry_index = index; 298 desc->cache_entry_index = index;
284 if (index == array->eof_index)
285 desc->eof = 1;
286 return 0; 299 return 0;
287out_eof: 300out_eof:
288 desc->eof = 1; 301 desc->eof = 1;
@@ -296,17 +309,16 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
296 int status = -EAGAIN; 309 int status = -EAGAIN;
297 310
298 for (i = 0; i < array->size; i++) { 311 for (i = 0; i < array->size; i++) {
299 if (i == array->eof_index) {
300 desc->eof = 1;
301 status = -EBADCOOKIE;
302 }
303 if (array->array[i].cookie == *desc->dir_cookie) { 312 if (array->array[i].cookie == *desc->dir_cookie) {
304 desc->cache_entry_index = i; 313 desc->cache_entry_index = i;
305 status = 0; 314 return 0;
306 break;
307 } 315 }
308 } 316 }
309 317 if (array->eof_index >= 0) {
318 status = -EBADCOOKIE;
319 if (*desc->dir_cookie == array->last_cookie)
320 desc->eof = 1;
321 }
310 return status; 322 return status;
311} 323}
312 324
@@ -314,10 +326,7 @@ static
314int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) 326int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
315{ 327{
316 struct nfs_cache_array *array; 328 struct nfs_cache_array *array;
317 int status = -EBADCOOKIE; 329 int status;
318
319 if (desc->dir_cookie == NULL)
320 goto out;
321 330
322 array = nfs_readdir_get_array(desc->page); 331 array = nfs_readdir_get_array(desc->page);
323 if (IS_ERR(array)) { 332 if (IS_ERR(array)) {
@@ -330,6 +339,10 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
330 else 339 else
331 status = nfs_readdir_search_for_cookie(array, desc); 340 status = nfs_readdir_search_for_cookie(array, desc);
332 341
342 if (status == -EAGAIN) {
343 desc->last_cookie = array->last_cookie;
344 desc->page_index++;
345 }
333 nfs_readdir_release_array(desc->page); 346 nfs_readdir_release_array(desc->page);
334out: 347out:
335 return status; 348 return status;
@@ -381,13 +394,9 @@ int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct x
381static 394static
382int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) 395int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
383{ 396{
384 struct nfs_inode *node;
385 if (dentry->d_inode == NULL) 397 if (dentry->d_inode == NULL)
386 goto different; 398 goto different;
387 node = NFS_I(dentry->d_inode); 399 if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0)
388 if (node->fh.size != entry->fh->size)
389 goto different;
390 if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
391 goto different; 400 goto different;
392 return 1; 401 return 1;
393different: 402different:
@@ -429,7 +438,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
429 if (dentry == NULL) 438 if (dentry == NULL)
430 return; 439 return;
431 440
432 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 441 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
433 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); 442 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
434 if (IS_ERR(inode)) 443 if (IS_ERR(inode))
435 goto out; 444 goto out;
@@ -449,14 +458,15 @@ out:
449 458
450/* Perform conversion from xdr to cache array */ 459/* Perform conversion from xdr to cache array */
451static 460static
452void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, 461int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
453 void *xdr_page, struct page *page, unsigned int buflen) 462 void *xdr_page, struct page *page, unsigned int buflen)
454{ 463{
455 struct xdr_stream stream; 464 struct xdr_stream stream;
456 struct xdr_buf buf; 465 struct xdr_buf buf;
457 __be32 *ptr = xdr_page; 466 __be32 *ptr = xdr_page;
458 int status;
459 struct nfs_cache_array *array; 467 struct nfs_cache_array *array;
468 unsigned int count = 0;
469 int status;
460 470
461 buf.head->iov_base = xdr_page; 471 buf.head->iov_base = xdr_page;
462 buf.head->iov_len = buflen; 472 buf.head->iov_len = buflen;
@@ -471,21 +481,32 @@ void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *e
471 481
472 do { 482 do {
473 status = xdr_decode(desc, entry, &stream); 483 status = xdr_decode(desc, entry, &stream);
474 if (status != 0) 484 if (status != 0) {
485 if (status == -EAGAIN)
486 status = 0;
475 break; 487 break;
488 }
476 489
477 if (nfs_readdir_add_to_array(entry, page) == -1) 490 count++;
478 break; 491
479 if (desc->plus == 1) 492 if (desc->plus != 0)
480 nfs_prime_dcache(desc->file->f_path.dentry, entry); 493 nfs_prime_dcache(desc->file->f_path.dentry, entry);
494
495 status = nfs_readdir_add_to_array(entry, page);
496 if (status != 0)
497 break;
481 } while (!entry->eof); 498 } while (!entry->eof);
482 499
483 if (status == -EBADCOOKIE && entry->eof) { 500 if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
484 array = nfs_readdir_get_array(page); 501 array = nfs_readdir_get_array(page);
485 array->eof_index = array->size - 1; 502 if (!IS_ERR(array)) {
486 status = 0; 503 array->eof_index = array->size;
487 nfs_readdir_release_array(page); 504 status = 0;
505 nfs_readdir_release_array(page);
506 } else
507 status = PTR_ERR(array);
488 } 508 }
509 return status;
489} 510}
490 511
491static 512static
@@ -537,11 +558,11 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
537 struct nfs_entry entry; 558 struct nfs_entry entry;
538 struct file *file = desc->file; 559 struct file *file = desc->file;
539 struct nfs_cache_array *array; 560 struct nfs_cache_array *array;
540 int status = 0; 561 int status = -ENOMEM;
541 unsigned int array_size = ARRAY_SIZE(pages); 562 unsigned int array_size = ARRAY_SIZE(pages);
542 563
543 entry.prev_cookie = 0; 564 entry.prev_cookie = 0;
544 entry.cookie = *desc->dir_cookie; 565 entry.cookie = desc->last_cookie;
545 entry.eof = 0; 566 entry.eof = 0;
546 entry.fh = nfs_alloc_fhandle(); 567 entry.fh = nfs_alloc_fhandle();
547 entry.fattr = nfs_alloc_fattr(); 568 entry.fattr = nfs_alloc_fattr();
@@ -549,6 +570,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
549 goto out; 570 goto out;
550 571
551 array = nfs_readdir_get_array(page); 572 array = nfs_readdir_get_array(page);
573 if (IS_ERR(array)) {
574 status = PTR_ERR(array);
575 goto out;
576 }
552 memset(array, 0, sizeof(struct nfs_cache_array)); 577 memset(array, 0, sizeof(struct nfs_cache_array));
553 array->eof_index = -1; 578 array->eof_index = -1;
554 579
@@ -556,12 +581,19 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
556 if (!pages_ptr) 581 if (!pages_ptr)
557 goto out_release_array; 582 goto out_release_array;
558 do { 583 do {
584 unsigned int pglen;
559 status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); 585 status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
560 586
561 if (status < 0) 587 if (status < 0)
562 break; 588 break;
563 nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE); 589 pglen = status;
564 } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY); 590 status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
591 if (status < 0) {
592 if (status == -ENOSPC)
593 status = 0;
594 break;
595 }
596 } while (array->eof_index < 0);
565 597
566 nfs_readdir_free_large_page(pages_ptr, pages, array_size); 598 nfs_readdir_free_large_page(pages_ptr, pages, array_size);
567out_release_array: 599out_release_array:
@@ -582,8 +614,10 @@ static
582int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) 614int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
583{ 615{
584 struct inode *inode = desc->file->f_path.dentry->d_inode; 616 struct inode *inode = desc->file->f_path.dentry->d_inode;
617 int ret;
585 618
586 if (nfs_readdir_xdr_to_array(desc, page, inode) < 0) 619 ret = nfs_readdir_xdr_to_array(desc, page, inode);
620 if (ret < 0)
587 goto error; 621 goto error;
588 SetPageUptodate(page); 622 SetPageUptodate(page);
589 623
@@ -595,12 +629,14 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
595 return 0; 629 return 0;
596 error: 630 error:
597 unlock_page(page); 631 unlock_page(page);
598 return -EIO; 632 return ret;
599} 633}
600 634
601static 635static
602void cache_page_release(nfs_readdir_descriptor_t *desc) 636void cache_page_release(nfs_readdir_descriptor_t *desc)
603{ 637{
638 if (!desc->page->mapping)
639 nfs_readdir_clear_array(desc->page);
604 page_cache_release(desc->page); 640 page_cache_release(desc->page);
605 desc->page = NULL; 641 desc->page = NULL;
606} 642}
@@ -608,12 +644,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
608static 644static
609struct page *get_cache_page(nfs_readdir_descriptor_t *desc) 645struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
610{ 646{
611 struct page *page; 647 return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
612 page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
613 desc->page_index, (filler_t *)nfs_readdir_filler, desc); 648 desc->page_index, (filler_t *)nfs_readdir_filler, desc);
614 if (IS_ERR(page))
615 desc->eof = 1;
616 return page;
617} 649}
618 650
619/* 651/*
@@ -629,9 +661,8 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
629 return PTR_ERR(desc->page); 661 return PTR_ERR(desc->page);
630 662
631 res = nfs_readdir_search_array(desc); 663 res = nfs_readdir_search_array(desc);
632 if (res == 0) 664 if (res != 0)
633 return 0; 665 cache_page_release(desc);
634 cache_page_release(desc);
635 return res; 666 return res;
636} 667}
637 668
@@ -639,22 +670,18 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
639static inline 670static inline
640int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) 671int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
641{ 672{
642 int res = -EAGAIN; 673 int res;
643 674
644 while (1) { 675 if (desc->page_index == 0) {
645 res = find_cache_page(desc); 676 desc->current_index = 0;
646 if (res != -EAGAIN) 677 desc->last_cookie = 0;
647 break;
648 desc->page_index++;
649 } 678 }
679 do {
680 res = find_cache_page(desc);
681 } while (res == -EAGAIN);
650 return res; 682 return res;
651} 683}
652 684
653static inline unsigned int dt_type(struct inode *inode)
654{
655 return (inode->i_mode >> 12) & 15;
656}
657
658/* 685/*
659 * Once we've found the start of the dirent within a page: fill 'er up... 686 * Once we've found the start of the dirent within a page: fill 'er up...
660 */ 687 */
@@ -666,35 +693,35 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
666 int i = 0; 693 int i = 0;
667 int res = 0; 694 int res = 0;
668 struct nfs_cache_array *array = NULL; 695 struct nfs_cache_array *array = NULL;
669 unsigned int d_type = DT_UNKNOWN;
670 struct dentry *dentry = NULL;
671 696
672 array = nfs_readdir_get_array(desc->page); 697 array = nfs_readdir_get_array(desc->page);
698 if (IS_ERR(array)) {
699 res = PTR_ERR(array);
700 goto out;
701 }
673 702
674 for (i = desc->cache_entry_index; i < array->size; i++) { 703 for (i = desc->cache_entry_index; i < array->size; i++) {
675 d_type = DT_UNKNOWN; 704 struct nfs_cache_array_entry *ent;
676 705
677 res = filldir(dirent, array->array[i].string.name, 706 ent = &array->array[i];
678 array->array[i].string.len, file->f_pos, 707 if (filldir(dirent, ent->string.name, ent->string.len,
679 nfs_compat_user_ino64(array->array[i].ino), d_type); 708 file->f_pos, nfs_compat_user_ino64(ent->ino),
680 if (res < 0) 709 ent->d_type) < 0) {
710 desc->eof = 1;
681 break; 711 break;
712 }
682 file->f_pos++; 713 file->f_pos++;
683 desc->cache_entry_index = i;
684 if (i < (array->size-1)) 714 if (i < (array->size-1))
685 *desc->dir_cookie = array->array[i+1].cookie; 715 *desc->dir_cookie = array->array[i+1].cookie;
686 else 716 else
687 *desc->dir_cookie = array->last_cookie; 717 *desc->dir_cookie = array->last_cookie;
688 if (i == array->eof_index) {
689 desc->eof = 1;
690 break;
691 }
692 } 718 }
719 if (array->eof_index >= 0)
720 desc->eof = 1;
693 721
694 nfs_readdir_release_array(desc->page); 722 nfs_readdir_release_array(desc->page);
723out:
695 cache_page_release(desc); 724 cache_page_release(desc);
696 if (dentry != NULL)
697 dput(dentry);
698 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", 725 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
699 (unsigned long long)*desc->dir_cookie, res); 726 (unsigned long long)*desc->dir_cookie, res);
700 return res; 727 return res;
@@ -729,13 +756,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
729 goto out; 756 goto out;
730 } 757 }
731 758
732 if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
733 status = -EIO;
734 goto out_release;
735 }
736
737 desc->page_index = 0; 759 desc->page_index = 0;
760 desc->last_cookie = *desc->dir_cookie;
738 desc->page = page; 761 desc->page = page;
762
763 status = nfs_readdir_xdr_to_array(desc, page, inode);
764 if (status < 0)
765 goto out_release;
766
739 status = nfs_do_filldir(desc, dirent, filldir); 767 status = nfs_do_filldir(desc, dirent, filldir);
740 768
741 out: 769 out:
@@ -757,7 +785,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
757 struct inode *inode = dentry->d_inode; 785 struct inode *inode = dentry->d_inode;
758 nfs_readdir_descriptor_t my_desc, 786 nfs_readdir_descriptor_t my_desc,
759 *desc = &my_desc; 787 *desc = &my_desc;
760 int res = -ENOMEM; 788 int res;
761 789
762 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 790 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
763 dentry->d_parent->d_name.name, dentry->d_name.name, 791 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -782,18 +810,18 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
782 if (res < 0) 810 if (res < 0)
783 goto out; 811 goto out;
784 812
785 while (desc->eof != 1) { 813 do {
786 res = readdir_search_pagecache(desc); 814 res = readdir_search_pagecache(desc);
787 815
788 if (res == -EBADCOOKIE) { 816 if (res == -EBADCOOKIE) {
817 res = 0;
789 /* This means either end of directory */ 818 /* This means either end of directory */
790 if (*desc->dir_cookie && desc->eof == 0) { 819 if (*desc->dir_cookie && desc->eof == 0) {
791 /* Or that the server has 'lost' a cookie */ 820 /* Or that the server has 'lost' a cookie */
792 res = uncached_readdir(desc, dirent, filldir); 821 res = uncached_readdir(desc, dirent, filldir);
793 if (res >= 0) 822 if (res == 0)
794 continue; 823 continue;
795 } 824 }
796 res = 0;
797 break; 825 break;
798 } 826 }
799 if (res == -ETOOSMALL && desc->plus) { 827 if (res == -ETOOSMALL && desc->plus) {
@@ -808,11 +836,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
808 break; 836 break;
809 837
810 res = nfs_do_filldir(desc, dirent, filldir); 838 res = nfs_do_filldir(desc, dirent, filldir);
811 if (res < 0) { 839 if (res < 0)
812 res = 0;
813 break; 840 break;
814 } 841 } while (!desc->eof);
815 }
816out: 842out:
817 nfs_unblock_sillyrename(dentry); 843 nfs_unblock_sillyrename(dentry);
818 if (res > 0) 844 if (res > 0)
@@ -912,7 +938,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
912 * component of the path. 938 * component of the path.
913 * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. 939 * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT.
914 */ 940 */
915static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigned int mask) 941static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
942 unsigned int mask)
916{ 943{
917 if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT)) 944 if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
918 return 0; 945 return 0;
@@ -992,7 +1019,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
992 * If the parent directory is seen to have changed, we throw out the 1019 * If the parent directory is seen to have changed, we throw out the
993 * cached dentry and do a new lookup. 1020 * cached dentry and do a new lookup.
994 */ 1021 */
995static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) 1022static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
996{ 1023{
997 struct inode *dir; 1024 struct inode *dir;
998 struct inode *inode; 1025 struct inode *inode;
@@ -1001,6 +1028,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
1001 struct nfs_fattr *fattr = NULL; 1028 struct nfs_fattr *fattr = NULL;
1002 int error; 1029 int error;
1003 1030
1031 if (nd->flags & LOOKUP_RCU)
1032 return -ECHILD;
1033
1004 parent = dget_parent(dentry); 1034 parent = dget_parent(dentry);
1005 dir = parent->d_inode; 1035 dir = parent->d_inode;
1006 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); 1036 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
@@ -1091,7 +1121,7 @@ out_error:
1091/* 1121/*
1092 * This is called from dput() when d_count is going to 0. 1122 * This is called from dput() when d_count is going to 0.
1093 */ 1123 */
1094static int nfs_dentry_delete(struct dentry *dentry) 1124static int nfs_dentry_delete(const struct dentry *dentry)
1095{ 1125{
1096 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n", 1126 dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
1097 dentry->d_parent->d_name.name, dentry->d_name.name, 1127 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -1162,7 +1192,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1162 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) 1192 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
1163 goto out; 1193 goto out;
1164 1194
1165 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 1195 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
1166 1196
1167 /* 1197 /*
1168 * If we're doing an exclusive create, optimize away the lookup 1198 * If we're doing an exclusive create, optimize away the lookup
@@ -1307,7 +1337,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1307 res = ERR_PTR(-ENAMETOOLONG); 1337 res = ERR_PTR(-ENAMETOOLONG);
1308 goto out; 1338 goto out;
1309 } 1339 }
1310 dentry->d_op = NFS_PROTO(dir)->dentry_ops; 1340 d_set_d_op(dentry, NFS_PROTO(dir)->dentry_ops);
1311 1341
1312 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash 1342 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
1313 * the dentry. */ 1343 * the dentry. */
@@ -1345,12 +1375,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1345 res = NULL; 1375 res = NULL;
1346 goto out; 1376 goto out;
1347 /* This turned out not to be a regular file */ 1377 /* This turned out not to be a regular file */
1348 case -EISDIR:
1349 case -ENOTDIR: 1378 case -ENOTDIR:
1350 goto no_open; 1379 goto no_open;
1351 case -ELOOP: 1380 case -ELOOP:
1352 if (!(nd->intent.open.flags & O_NOFOLLOW)) 1381 if (!(nd->intent.open.flags & O_NOFOLLOW))
1353 goto no_open; 1382 goto no_open;
1383 /* case -EISDIR: */
1354 /* case -EINVAL: */ 1384 /* case -EINVAL: */
1355 default: 1385 default:
1356 res = ERR_CAST(inode); 1386 res = ERR_CAST(inode);
@@ -1692,11 +1722,9 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1692 dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id, 1722 dfprintk(VFS, "NFS: unlink(%s/%ld, %s)\n", dir->i_sb->s_id,
1693 dir->i_ino, dentry->d_name.name); 1723 dir->i_ino, dentry->d_name.name);
1694 1724
1695 spin_lock(&dcache_lock);
1696 spin_lock(&dentry->d_lock); 1725 spin_lock(&dentry->d_lock);
1697 if (atomic_read(&dentry->d_count) > 1) { 1726 if (dentry->d_count > 1) {
1698 spin_unlock(&dentry->d_lock); 1727 spin_unlock(&dentry->d_lock);
1699 spin_unlock(&dcache_lock);
1700 /* Start asynchronous writeout of the inode */ 1728 /* Start asynchronous writeout of the inode */
1701 write_inode_now(dentry->d_inode, 0); 1729 write_inode_now(dentry->d_inode, 0);
1702 error = nfs_sillyrename(dir, dentry); 1730 error = nfs_sillyrename(dir, dentry);
@@ -1707,7 +1735,6 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
1707 need_rehash = 1; 1735 need_rehash = 1;
1708 } 1736 }
1709 spin_unlock(&dentry->d_lock); 1737 spin_unlock(&dentry->d_lock);
1710 spin_unlock(&dcache_lock);
1711 error = nfs_safe_remove(dentry); 1738 error = nfs_safe_remove(dentry);
1712 if (!error || error == -ENOENT) { 1739 if (!error || error == -ENOENT) {
1713 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1740 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -1842,7 +1869,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1842 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", 1869 dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n",
1843 old_dentry->d_parent->d_name.name, old_dentry->d_name.name, 1870 old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
1844 new_dentry->d_parent->d_name.name, new_dentry->d_name.name, 1871 new_dentry->d_parent->d_name.name, new_dentry->d_name.name,
1845 atomic_read(&new_dentry->d_count)); 1872 new_dentry->d_count);
1846 1873
1847 /* 1874 /*
1848 * For non-directories, check whether the target is busy and if so, 1875 * For non-directories, check whether the target is busy and if so,
@@ -1860,7 +1887,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1860 rehash = new_dentry; 1887 rehash = new_dentry;
1861 } 1888 }
1862 1889
1863 if (atomic_read(&new_dentry->d_count) > 2) { 1890 if (new_dentry->d_count > 2) {
1864 int err; 1891 int err;
1865 1892
1866 /* copy the target dentry's name */ 1893 /* copy the target dentry's name */
@@ -2162,11 +2189,14 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
2162 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); 2189 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
2163} 2190}
2164 2191
2165int nfs_permission(struct inode *inode, int mask) 2192int nfs_permission(struct inode *inode, int mask, unsigned int flags)
2166{ 2193{
2167 struct rpc_cred *cred; 2194 struct rpc_cred *cred;
2168 int res = 0; 2195 int res = 0;
2169 2196
2197 if (flags & IPERM_FLAG_RCU)
2198 return -ECHILD;
2199
2170 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 2200 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2171 2201
2172 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) 2202 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@@ -2214,7 +2244,7 @@ out:
2214out_notsup: 2244out_notsup:
2215 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 2245 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2216 if (res == 0) 2246 if (res == 0)
2217 res = generic_permission(inode, mask, NULL); 2247 res = generic_permission(inode, mask, flags, NULL);
2218 goto out; 2248 goto out;
2219} 2249}
2220 2250
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 84d3c8b90206..e6ace0d93c71 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -867,7 +867,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
867 goto out; 867 goto out;
868 nfs_alloc_commit_data(dreq); 868 nfs_alloc_commit_data(dreq);
869 869
870 if (dreq->commit_data == NULL || count < wsize) 870 if (dreq->commit_data == NULL || count <= wsize)
871 sync = NFS_FILE_SYNC; 871 sync = NFS_FILE_SYNC;
872 872
873 dreq->inode = inode; 873 dreq->inode = inode;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 60677f9f1311..7bf029ef4084 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -693,6 +693,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
693{ 693{
694 struct inode *inode = filp->f_mapping->host; 694 struct inode *inode = filp->f_mapping->host;
695 int status = 0; 695 int status = 0;
696 unsigned int saved_type = fl->fl_type;
696 697
697 /* Try local locking first */ 698 /* Try local locking first */
698 posix_test_lock(filp, fl); 699 posix_test_lock(filp, fl);
@@ -700,6 +701,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
700 /* found a conflict */ 701 /* found a conflict */
701 goto out; 702 goto out;
702 } 703 }
704 fl->fl_type = saved_type;
703 705
704 if (nfs_have_delegation(inode, FMODE_READ)) 706 if (nfs_have_delegation(inode, FMODE_READ))
705 goto out_noconflict; 707 goto out_noconflict;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index ac7b814ce162..5596c6a2881e 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -63,9 +63,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
63 * This again causes shrink_dcache_for_umount_subtree() to 63 * This again causes shrink_dcache_for_umount_subtree() to
64 * Oops, since the test for IS_ROOT() will fail. 64 * Oops, since the test for IS_ROOT() will fail.
65 */ 65 */
66 spin_lock(&dcache_lock); 66 spin_lock(&sb->s_root->d_inode->i_lock);
67 spin_lock(&sb->s_root->d_lock);
67 list_del_init(&sb->s_root->d_alias); 68 list_del_init(&sb->s_root->d_alias);
68 spin_unlock(&dcache_lock); 69 spin_unlock(&sb->s_root->d_lock);
70 spin_unlock(&sb->s_root->d_inode->i_lock);
69 } 71 }
70 return 0; 72 return 0;
71} 73}
@@ -119,7 +121,7 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
119 security_d_instantiate(ret, inode); 121 security_d_instantiate(ret, inode);
120 122
121 if (ret->d_op == NULL) 123 if (ret->d_op == NULL)
122 ret->d_op = server->nfs_client->rpc_ops->dentry_ops; 124 d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
123out: 125out:
124 nfs_free_fattr(fsinfo.fattr); 126 nfs_free_fattr(fsinfo.fattr);
125 return ret; 127 return ret;
@@ -226,7 +228,7 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
226 security_d_instantiate(ret, inode); 228 security_d_instantiate(ret, inode);
227 229
228 if (ret->d_op == NULL) 230 if (ret->d_op == NULL)
229 ret->d_op = server->nfs_client->rpc_ops->dentry_ops; 231 d_set_d_op(ret, server->nfs_client->rpc_ops->dentry_ops);
230 232
231out: 233out:
232 nfs_free_fattr(fattr); 234 nfs_free_fattr(fattr);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 314f57164602..017daa3bed38 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -289,6 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
289 } else if (S_ISDIR(inode->i_mode)) { 289 } else if (S_ISDIR(inode->i_mode)) {
290 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 290 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
291 inode->i_fop = &nfs_dir_operations; 291 inode->i_fop = &nfs_dir_operations;
292 inode->i_data.a_ops = &nfs_dir_aops;
292 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) 293 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
293 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 294 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
294 /* Deal with crossing mountpoints */ 295 /* Deal with crossing mountpoints */
@@ -1437,11 +1438,18 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
1437 return &nfsi->vfs_inode; 1438 return &nfsi->vfs_inode;
1438} 1439}
1439 1440
1440void nfs_destroy_inode(struct inode *inode) 1441static void nfs_i_callback(struct rcu_head *head)
1441{ 1442{
1443 struct inode *inode = container_of(head, struct inode, i_rcu);
1444 INIT_LIST_HEAD(&inode->i_dentry);
1442 kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); 1445 kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
1443} 1446}
1444 1447
1448void nfs_destroy_inode(struct inode *inode)
1449{
1450 call_rcu(&inode->i_rcu, nfs_i_callback);
1451}
1452
1445static inline void nfs4_init_once(struct nfs_inode *nfsi) 1453static inline void nfs4_init_once(struct nfs_inode *nfsi)
1446{ 1454{
1447#ifdef CONFIG_NFS_V4 1455#ifdef CONFIG_NFS_V4
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index db08ff3ff454..e6356b750b77 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -362,6 +362,15 @@ unsigned int nfs_page_length(struct page *page)
362} 362}
363 363
364/* 364/*
365 * Convert a umode to a dirent->d_type
366 */
367static inline
368unsigned char nfs_umode_to_dtype(umode_t mode)
369{
370 return (mode >> 12) & 15;
371}
372
373/*
365 * Determine the number of pages in an array of length 'len' and 374 * Determine the number of pages in an array of length 'len' and
366 * with a base offset of 'base' 375 * with a base offset of 'base'
367 */ 376 */
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index eceafe74f473..4f981f1f6689 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -505,13 +505,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
505 505
506static struct rpc_version mnt_version1 = { 506static struct rpc_version mnt_version1 = {
507 .number = 1, 507 .number = 1,
508 .nrprocs = 2, 508 .nrprocs = ARRAY_SIZE(mnt_procedures),
509 .procs = mnt_procedures, 509 .procs = mnt_procedures,
510}; 510};
511 511
512static struct rpc_version mnt_version3 = { 512static struct rpc_version mnt_version3 = {
513 .number = 3, 513 .number = 3,
514 .nrprocs = 2, 514 .nrprocs = ARRAY_SIZE(mnt3_procedures),
515 .procs = mnt3_procedures, 515 .procs = mnt3_procedures,
516}; 516};
517 517
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index db6aa3673cf3..74aaf3963c10 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -49,12 +49,17 @@ char *nfs_path(const char *base,
49 const struct dentry *dentry, 49 const struct dentry *dentry,
50 char *buffer, ssize_t buflen) 50 char *buffer, ssize_t buflen)
51{ 51{
52 char *end = buffer+buflen; 52 char *end;
53 int namelen; 53 int namelen;
54 unsigned seq;
54 55
56rename_retry:
57 end = buffer+buflen;
55 *--end = '\0'; 58 *--end = '\0';
56 buflen--; 59 buflen--;
57 spin_lock(&dcache_lock); 60
61 seq = read_seqbegin(&rename_lock);
62 rcu_read_lock();
58 while (!IS_ROOT(dentry) && dentry != droot) { 63 while (!IS_ROOT(dentry) && dentry != droot) {
59 namelen = dentry->d_name.len; 64 namelen = dentry->d_name.len;
60 buflen -= namelen + 1; 65 buflen -= namelen + 1;
@@ -65,7 +70,9 @@ char *nfs_path(const char *base,
65 *--end = '/'; 70 *--end = '/';
66 dentry = dentry->d_parent; 71 dentry = dentry->d_parent;
67 } 72 }
68 spin_unlock(&dcache_lock); 73 rcu_read_unlock();
74 if (read_seqretry(&rename_lock, seq))
75 goto rename_retry;
69 if (*end != '/') { 76 if (*end != '/') {
70 if (--buflen < 0) 77 if (--buflen < 0)
71 goto Elong; 78 goto Elong;
@@ -82,7 +89,9 @@ char *nfs_path(const char *base,
82 memcpy(end, base, namelen); 89 memcpy(end, base, namelen);
83 return end; 90 return end;
84Elong_unlock: 91Elong_unlock:
85 spin_unlock(&dcache_lock); 92 rcu_read_unlock();
93 if (read_seqretry(&rename_lock, seq))
94 goto rename_retry;
86Elong: 95Elong:
87 return ERR_PTR(-ENAMETOOLONG); 96 return ERR_PTR(-ENAMETOOLONG);
88} 97}
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index e6bf45710cc7..5914a1911c95 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -423,7 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
423 struct page **page; 423 struct page **page;
424 size_t hdrlen; 424 size_t hdrlen;
425 unsigned int pglen, recvd; 425 unsigned int pglen, recvd;
426 int status, nr = 0; 426 int status;
427 427
428 if ((status = ntohl(*p++))) 428 if ((status = ntohl(*p++)))
429 return nfs_stat_to_errno(status); 429 return nfs_stat_to_errno(status);
@@ -443,7 +443,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
443 if (pglen > recvd) 443 if (pglen > recvd)
444 pglen = recvd; 444 pglen = recvd;
445 page = rcvbuf->pages; 445 page = rcvbuf->pages;
446 return nr; 446 return pglen;
447} 447}
448 448
449static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 449static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -485,6 +485,8 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
485 entry->prev_cookie = entry->cookie; 485 entry->prev_cookie = entry->cookie;
486 entry->cookie = ntohl(*p++); 486 entry->cookie = ntohl(*p++);
487 487
488 entry->d_type = DT_UNKNOWN;
489
488 p = xdr_inline_peek(xdr, 8); 490 p = xdr_inline_peek(xdr, 8);
489 if (p != NULL) 491 if (p != NULL)
490 entry->eof = !p[0] && p[1]; 492 entry->eof = !p[0] && p[1];
@@ -495,7 +497,7 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
495 497
496out_overflow: 498out_overflow:
497 print_overflow_msg(__func__, xdr); 499 print_overflow_msg(__func__, xdr);
498 return ERR_PTR(-EIO); 500 return ERR_PTR(-EAGAIN);
499} 501}
500 502
501/* 503/*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d9a5e832c257..f6cc60f06dac 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -555,7 +555,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
555 struct page **page; 555 struct page **page;
556 size_t hdrlen; 556 size_t hdrlen;
557 u32 recvd, pglen; 557 u32 recvd, pglen;
558 int status, nr = 0; 558 int status;
559 559
560 status = ntohl(*p++); 560 status = ntohl(*p++);
561 /* Decode post_op_attrs */ 561 /* Decode post_op_attrs */
@@ -586,7 +586,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
586 pglen = recvd; 586 pglen = recvd;
587 page = rcvbuf->pages; 587 page = rcvbuf->pages;
588 588
589 return nr; 589 return pglen;
590} 590}
591 591
592__be32 * 592__be32 *
@@ -622,11 +622,13 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
622 entry->prev_cookie = entry->cookie; 622 entry->prev_cookie = entry->cookie;
623 p = xdr_decode_hyper(p, &entry->cookie); 623 p = xdr_decode_hyper(p, &entry->cookie);
624 624
625 entry->d_type = DT_UNKNOWN;
625 if (plus) { 626 if (plus) {
626 entry->fattr->valid = 0; 627 entry->fattr->valid = 0;
627 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr); 628 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
628 if (IS_ERR(p)) 629 if (IS_ERR(p))
629 goto out_overflow_exit; 630 goto out_overflow_exit;
631 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
630 /* In fact, a post_op_fh3: */ 632 /* In fact, a post_op_fh3: */
631 p = xdr_inline_decode(xdr, 4); 633 p = xdr_inline_decode(xdr, 4);
632 if (unlikely(!p)) 634 if (unlikely(!p))
@@ -656,7 +658,7 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
656out_overflow: 658out_overflow:
657 print_overflow_msg(__func__, xdr); 659 print_overflow_msg(__func__, xdr);
658out_overflow_exit: 660out_overflow_exit:
659 return ERR_PTR(-EIO); 661 return ERR_PTR(-EAGAIN);
660} 662}
661 663
662/* 664/*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0f24cdf2cb13..4435e5e1f904 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2852,8 +2852,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2852 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 2852 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
2853 res.pgbase = args.pgbase; 2853 res.pgbase = args.pgbase;
2854 status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); 2854 status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
2855 if (status == 0) 2855 if (status >= 0) {
2856 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 2856 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
2857 status += args.pgbase;
2858 }
2857 2859
2858 nfs_invalidate_atime(dir); 2860 nfs_invalidate_atime(dir);
2859 2861
@@ -3359,6 +3361,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
3359 ret = nfs_revalidate_inode(server, inode); 3361 ret = nfs_revalidate_inode(server, inode);
3360 if (ret < 0) 3362 if (ret < 0)
3361 return ret; 3363 return ret;
3364 if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
3365 nfs_zap_acl_cache(inode);
3362 ret = nfs4_read_cached_acl(inode, buf, buflen); 3366 ret = nfs4_read_cached_acl(inode, buf, buflen);
3363 if (ret != -ENOENT) 3367 if (ret != -ENOENT)
3364 return ret; 3368 return ret;
@@ -3387,6 +3391,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3387 nfs_inode_return_delegation(inode); 3391 nfs_inode_return_delegation(inode);
3388 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 3392 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3389 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3393 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3394 /*
3395 * Acl update can result in inode attribute update.
3396 * so mark the attribute cache invalid.
3397 */
3398 spin_lock(&inode->i_lock);
3399 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
3400 spin_unlock(&inode->i_lock);
3390 nfs_access_zap_cache(inode); 3401 nfs_access_zap_cache(inode);
3391 nfs_zap_acl_cache(inode); 3402 nfs_zap_acl_cache(inode);
3392 return ret; 3403 return ret;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f313c4cce7e4..9f1826b012e6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4518,7 +4518,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
4518 xdr_read_pages(xdr, pglen); 4518 xdr_read_pages(xdr, pglen);
4519 4519
4520 4520
4521 return 0; 4521 return pglen;
4522} 4522}
4523 4523
4524static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) 4524static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
@@ -6208,6 +6208,10 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) 6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
6209 entry->ino = entry->fattr->fileid; 6209 entry->ino = entry->fattr->fileid;
6210 6210
6211 entry->d_type = DT_UNKNOWN;
6212 if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
6213 entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
6214
6211 if (verify_attr_len(xdr, p, len) < 0) 6215 if (verify_attr_len(xdr, p, len) < 0)
6212 goto out_overflow; 6216 goto out_overflow;
6213 6217
@@ -6221,7 +6225,7 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6221 6225
6222out_overflow: 6226out_overflow:
6223 print_overflow_msg(__func__, xdr); 6227 print_overflow_msg(__func__, xdr);
6224 return ERR_PTR(-EIO); 6228 return ERR_PTR(-EAGAIN);
6225} 6229}
6226 6230
6227/* 6231/*
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 137b549e63db..b68536cc9046 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -115,7 +115,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
115{ 115{
116 if (!nfs_lock_request_dontget(req)) 116 if (!nfs_lock_request_dontget(req))
117 return 0; 117 return 0;
118 if (req->wb_page != NULL) 118 if (test_bit(PG_MAPPED, &req->wb_flags))
119 radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 119 radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
120 return 1; 120 return 1;
121} 121}
@@ -125,7 +125,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
125 */ 125 */
126void nfs_clear_page_tag_locked(struct nfs_page *req) 126void nfs_clear_page_tag_locked(struct nfs_page *req)
127{ 127{
128 if (req->wb_page != NULL) { 128 if (test_bit(PG_MAPPED, &req->wb_flags)) {
129 struct inode *inode = req->wb_context->path.dentry->d_inode; 129 struct inode *inode = req->wb_context->path.dentry->d_inode;
130 struct nfs_inode *nfsi = NFS_I(inode); 130 struct nfs_inode *nfsi = NFS_I(inode);
131 131
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e4b62c6f5a6e..aedcaa7f291f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -152,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req)
152 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 152 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
153 req->wb_bytes, 153 req->wb_bytes,
154 (long long)req_offset(req)); 154 (long long)req_offset(req));
155 nfs_clear_request(req);
156 nfs_release_request(req); 155 nfs_release_request(req);
157} 156}
158 157
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0a42e8f4adcb..4100630c9a5b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -39,7 +39,6 @@
39#include <linux/nfs_mount.h> 39#include <linux/nfs_mount.h>
40#include <linux/nfs4_mount.h> 40#include <linux/nfs4_mount.h>
41#include <linux/lockd/bind.h> 41#include <linux/lockd/bind.h>
42#include <linux/smp_lock.h>
43#include <linux/seq_file.h> 42#include <linux/seq_file.h>
44#include <linux/mount.h> 43#include <linux/mount.h>
45#include <linux/mnt_namespace.h> 44#include <linux/mnt_namespace.h>
@@ -67,6 +66,12 @@
67 66
68#define NFSDBG_FACILITY NFSDBG_VFS 67#define NFSDBG_FACILITY NFSDBG_VFS
69 68
69#ifdef CONFIG_NFS_V3
70#define NFS_DEFAULT_VERSION 3
71#else
72#define NFS_DEFAULT_VERSION 2
73#endif
74
70enum { 75enum {
71 /* Mount options that take no arguments */ 76 /* Mount options that take no arguments */
72 Opt_soft, Opt_hard, 77 Opt_soft, Opt_hard,
@@ -1064,12 +1069,10 @@ static int nfs_parse_mount_options(char *raw,
1064 mnt->flags |= NFS_MOUNT_VER3; 1069 mnt->flags |= NFS_MOUNT_VER3;
1065 mnt->version = 3; 1070 mnt->version = 3;
1066 break; 1071 break;
1067#ifdef CONFIG_NFS_V4
1068 case Opt_v4: 1072 case Opt_v4:
1069 mnt->flags &= ~NFS_MOUNT_VER3; 1073 mnt->flags &= ~NFS_MOUNT_VER3;
1070 mnt->version = 4; 1074 mnt->version = 4;
1071 break; 1075 break;
1072#endif
1073 case Opt_udp: 1076 case Opt_udp:
1074 mnt->flags &= ~NFS_MOUNT_TCP; 1077 mnt->flags &= ~NFS_MOUNT_TCP;
1075 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1078 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1281,12 +1284,10 @@ static int nfs_parse_mount_options(char *raw,
1281 mnt->flags |= NFS_MOUNT_VER3; 1284 mnt->flags |= NFS_MOUNT_VER3;
1282 mnt->version = 3; 1285 mnt->version = 3;
1283 break; 1286 break;
1284#ifdef CONFIG_NFS_V4
1285 case NFS4_VERSION: 1287 case NFS4_VERSION:
1286 mnt->flags &= ~NFS_MOUNT_VER3; 1288 mnt->flags &= ~NFS_MOUNT_VER3;
1287 mnt->version = 4; 1289 mnt->version = 4;
1288 break; 1290 break;
1289#endif
1290 default: 1291 default:
1291 goto out_invalid_value; 1292 goto out_invalid_value;
1292 } 1293 }
@@ -2277,7 +2278,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2277 }; 2278 };
2278 int error = -ENOMEM; 2279 int error = -ENOMEM;
2279 2280
2280 data = nfs_alloc_parsed_mount_data(3); 2281 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2281 mntfh = nfs_alloc_fhandle(); 2282 mntfh = nfs_alloc_fhandle();
2282 if (data == NULL || mntfh == NULL) 2283 if (data == NULL || mntfh == NULL)
2283 goto out_free_fh; 2284 goto out_free_fh;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 7bdec8531400..8fe9eb47a97f 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -496,7 +496,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
496 496
497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", 497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
498 dentry->d_parent->d_name.name, dentry->d_name.name, 498 dentry->d_parent->d_name.name, dentry->d_name.name,
499 atomic_read(&dentry->d_count)); 499 dentry->d_count);
500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME); 500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
501 501
502 /* 502 /*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4c14c17a5276..10d648ea128b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -390,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
390 if (nfs_have_delegation(inode, FMODE_WRITE)) 390 if (nfs_have_delegation(inode, FMODE_WRITE))
391 nfsi->change_attr++; 391 nfsi->change_attr++;
392 } 392 }
393 set_bit(PG_MAPPED, &req->wb_flags);
393 SetPagePrivate(req->wb_page); 394 SetPagePrivate(req->wb_page);
394 set_page_private(req->wb_page, (unsigned long)req); 395 set_page_private(req->wb_page, (unsigned long)req);
395 nfsi->npages++; 396 nfsi->npages++;
@@ -415,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
415 spin_lock(&inode->i_lock); 416 spin_lock(&inode->i_lock);
416 set_page_private(req->wb_page, 0); 417 set_page_private(req->wb_page, 0);
417 ClearPagePrivate(req->wb_page); 418 ClearPagePrivate(req->wb_page);
419 clear_bit(PG_MAPPED, &req->wb_flags);
418 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 420 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
419 nfsi->npages--; 421 nfsi->npages--;
420 if (!nfsi->npages) { 422 if (!nfsi->npages) {
@@ -422,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
422 iput(inode); 424 iput(inode);
423 } else 425 } else
424 spin_unlock(&inode->i_lock); 426 spin_unlock(&inode->i_lock);
425 nfs_clear_request(req);
426 nfs_release_request(req); 427 nfs_release_request(req);
427} 428}
428 429
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2a533a0af2a9..7e84a852cdae 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -260,9 +260,11 @@ void fill_post_wcc(struct svc_fh *fhp)
260 err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, 260 err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
261 &fhp->fh_post_attr); 261 &fhp->fh_post_attr);
262 fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; 262 fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
263 if (err) 263 if (err) {
264 fhp->fh_post_saved = 0; 264 fhp->fh_post_saved = 0;
265 else 265 /* Grab the ctime anyway - set_change_info might use it */
266 fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime;
267 } else
266 fhp->fh_post_saved = 1; 268 fhp->fh_post_saved = 1;
267} 269}
268 270
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f1e5ec6b5105..fbd18c3074bb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -673,16 +673,17 @@ static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
673 spin_unlock(&clp->cl_lock); 673 spin_unlock(&clp->cl_lock);
674} 674}
675 675
676static void nfsd4_register_conn(struct nfsd4_conn *conn) 676static int nfsd4_register_conn(struct nfsd4_conn *conn)
677{ 677{
678 conn->cn_xpt_user.callback = nfsd4_conn_lost; 678 conn->cn_xpt_user.callback = nfsd4_conn_lost;
679 register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); 679 return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
680} 680}
681 681
682static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) 682static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
683{ 683{
684 struct nfsd4_conn *conn; 684 struct nfsd4_conn *conn;
685 u32 flags = NFS4_CDFC4_FORE; 685 u32 flags = NFS4_CDFC4_FORE;
686 int ret;
686 687
687 if (ses->se_flags & SESSION4_BACK_CHAN) 688 if (ses->se_flags & SESSION4_BACK_CHAN)
688 flags |= NFS4_CDFC4_BACK; 689 flags |= NFS4_CDFC4_BACK;
@@ -690,7 +691,10 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
690 if (!conn) 691 if (!conn)
691 return nfserr_jukebox; 692 return nfserr_jukebox;
692 nfsd4_hash_conn(conn, ses); 693 nfsd4_hash_conn(conn, ses);
693 nfsd4_register_conn(conn); 694 ret = nfsd4_register_conn(conn);
695 if (ret)
696 /* oops; xprt is already down: */
697 nfsd4_conn_lost(&conn->cn_xpt_user);
694 return nfs_ok; 698 return nfs_ok;
695} 699}
696 700
@@ -1644,6 +1648,7 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
1644{ 1648{
1645 struct nfs4_client *clp = ses->se_client; 1649 struct nfs4_client *clp = ses->se_client;
1646 struct nfsd4_conn *c; 1650 struct nfsd4_conn *c;
1651 int ret;
1647 1652
1648 spin_lock(&clp->cl_lock); 1653 spin_lock(&clp->cl_lock);
1649 c = __nfsd4_find_conn(new->cn_xprt, ses); 1654 c = __nfsd4_find_conn(new->cn_xprt, ses);
@@ -1654,7 +1659,10 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
1654 } 1659 }
1655 __nfsd4_hash_conn(new, ses); 1660 __nfsd4_hash_conn(new, ses);
1656 spin_unlock(&clp->cl_lock); 1661 spin_unlock(&clp->cl_lock);
1657 nfsd4_register_conn(new); 1662 ret = nfsd4_register_conn(new);
1663 if (ret)
1664 /* oops; xprt is already down: */
1665 nfsd4_conn_lost(&new->cn_xpt_user);
1658 return; 1666 return;
1659} 1667}
1660 1668
@@ -2254,7 +2262,7 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
2254 * Spawn a thread to perform a recall on the delegation represented 2262 * Spawn a thread to perform a recall on the delegation represented
2255 * by the lease (file_lock) 2263 * by the lease (file_lock)
2256 * 2264 *
2257 * Called from break_lease() with lock_kernel() held. 2265 * Called from break_lease() with lock_flocks() held.
2258 * Note: we assume break_lease will only call this *once* for any given 2266 * Note: we assume break_lease will only call this *once* for any given
2259 * lease. 2267 * lease.
2260 */ 2268 */
@@ -2278,7 +2286,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2278 list_add_tail(&dp->dl_recall_lru, &del_recall_lru); 2286 list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
2279 spin_unlock(&recall_lock); 2287 spin_unlock(&recall_lock);
2280 2288
2281 /* only place dl_time is set. protected by lock_kernel*/ 2289 /* only place dl_time is set. protected by lock_flocks*/
2282 dp->dl_time = get_seconds(); 2290 dp->dl_time = get_seconds();
2283 2291
2284 /* 2292 /*
@@ -2295,7 +2303,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
2295/* 2303/*
2296 * The file_lock is being reapd. 2304 * The file_lock is being reapd.
2297 * 2305 *
2298 * Called by locks_free_lock() with lock_kernel() held. 2306 * Called by locks_free_lock() with lock_flocks() held.
2299 */ 2307 */
2300static 2308static
2301void nfsd_release_deleg_cb(struct file_lock *fl) 2309void nfsd_release_deleg_cb(struct file_lock *fl)
@@ -2310,7 +2318,7 @@ void nfsd_release_deleg_cb(struct file_lock *fl)
2310} 2318}
2311 2319
2312/* 2320/*
2313 * Called from setlease() with lock_kernel() held 2321 * Called from setlease() with lock_flocks() held
2314 */ 2322 */
2315static 2323static
2316int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) 2324int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
@@ -4328,7 +4336,7 @@ __nfs4_state_shutdown(void)
4328void 4336void
4329nfs4_state_shutdown(void) 4337nfs4_state_shutdown(void)
4330{ 4338{
4331 cancel_rearming_delayed_workqueue(laundry_wq, &laundromat_work); 4339 cancel_delayed_work_sync(&laundromat_work);
4332 destroy_workqueue(laundry_wq); 4340 destroy_workqueue(laundry_wq);
4333 locks_end_grace(&nfsd4_manager); 4341 locks_end_grace(&nfsd4_manager);
4334 nfs4_lock_state(); 4342 nfs4_lock_state();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 184938fcff04..3a359023c9f7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1756,8 +1756,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1756 goto out_dput_new; 1756 goto out_dput_new;
1757 1757
1758 if (svc_msnfs(ffhp) && 1758 if (svc_msnfs(ffhp) &&
1759 ((atomic_read(&odentry->d_count) > 1) 1759 ((odentry->d_count > 1) || (ndentry->d_count > 1))) {
1760 || (atomic_read(&ndentry->d_count) > 1))) {
1761 host_err = -EPERM; 1760 host_err = -EPERM;
1762 goto out_dput_new; 1761 goto out_dput_new;
1763 } 1762 }
@@ -1843,7 +1842,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1843 if (type != S_IFDIR) { /* It's UNLINK */ 1842 if (type != S_IFDIR) { /* It's UNLINK */
1844#ifdef MSNFS 1843#ifdef MSNFS
1845 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && 1844 if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
1846 (atomic_read(&rdentry->d_count) > 1)) { 1845 (rdentry->d_count > 1)) {
1847 host_err = -EPERM; 1846 host_err = -EPERM;
1848 } else 1847 } else
1849#endif 1848#endif
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 4d476ff08ae6..60fce3dc5cb5 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -484,18 +484,17 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
484static inline void 484static inline void
485set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp) 485set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
486{ 486{
487 BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved); 487 BUG_ON(!fhp->fh_pre_saved);
488 cinfo->atomic = 1; 488 cinfo->atomic = fhp->fh_post_saved;
489 cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode); 489 cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
490 if (cinfo->change_supported) { 490
491 cinfo->before_change = fhp->fh_pre_change; 491 cinfo->before_change = fhp->fh_pre_change;
492 cinfo->after_change = fhp->fh_post_change; 492 cinfo->after_change = fhp->fh_post_change;
493 } else { 493 cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
494 cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; 494 cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
495 cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; 495 cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
496 cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec; 496 cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
497 cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec; 497
498 }
499} 498}
500 499
501int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *); 500int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 49c844dab33a..59e5fe742f7b 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -335,7 +335,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
335 * the device at this point. 335 * the device at this point.
336 * 336 *
337 * To prevent nilfs_dat_translate() from returning the 337 * To prevent nilfs_dat_translate() from returning the
338 * uncommited block number, this makes a copy of the entry 338 * uncommitted block number, this makes a copy of the entry
339 * buffer and redirects nilfs_dat_translate() to the copy. 339 * buffer and redirects nilfs_dat_translate() to the copy.
340 */ 340 */
341 if (!buffer_nilfs_redirected(entry_bh)) { 341 if (!buffer_nilfs_redirected(entry_bh)) {
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 33ad25ddd5c4..caf9a6a3fb54 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -176,7 +176,6 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
176int nilfs_init_gcinode(struct inode *inode) 176int nilfs_init_gcinode(struct inode *inode)
177{ 177{
178 struct nilfs_inode_info *ii = NILFS_I(inode); 178 struct nilfs_inode_info *ii = NILFS_I(inode);
179 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
180 179
181 inode->i_mode = S_IFREG; 180 inode->i_mode = S_IFREG;
182 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 181 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
@@ -186,14 +185,6 @@ int nilfs_init_gcinode(struct inode *inode)
186 ii->i_flags = 0; 185 ii->i_flags = 0;
187 nilfs_bmap_init_gc(ii->i_bmap); 186 nilfs_bmap_init_gc(ii->i_bmap);
188 187
189 /*
190 * Add the inode to GC inode list. Garbage Collection
191 * is serialized and no two processes manipulate the
192 * list simultaneously.
193 */
194 igrab(inode);
195 list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes);
196
197 return 0; 188 return 0;
198} 189}
199 190
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 71d4bc8464e0..77b48c8fab17 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -785,15 +785,19 @@ out_err:
785 return err; 785 return err;
786} 786}
787 787
788int nilfs_permission(struct inode *inode, int mask) 788int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
789{ 789{
790 struct nilfs_root *root = NILFS_I(inode)->i_root; 790 struct nilfs_root *root;
791
792 if (flags & IPERM_FLAG_RCU)
793 return -ECHILD;
791 794
795 root = NILFS_I(inode)->i_root;
792 if ((mask & MAY_WRITE) && root && 796 if ((mask & MAY_WRITE) && root &&
793 root->cno != NILFS_CPTREE_CURRENT_CNO) 797 root->cno != NILFS_CPTREE_CURRENT_CNO)
794 return -EROFS; /* snapshot is not writable */ 798 return -EROFS; /* snapshot is not writable */
795 799
796 return generic_permission(inode, mask, NULL); 800 return generic_permission(inode, mask, flags, NULL);
797} 801}
798 802
799int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, 803int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode,
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 3e90f86d5bfe..b185e937a335 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -337,6 +337,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
337 struct nilfs_argv *argv, void *buf) 337 struct nilfs_argv *argv, void *buf)
338{ 338{
339 size_t nmembs = argv->v_nmembs; 339 size_t nmembs = argv->v_nmembs;
340 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
340 struct inode *inode; 341 struct inode *inode;
341 struct nilfs_vdesc *vdesc; 342 struct nilfs_vdesc *vdesc;
342 struct buffer_head *bh, *n; 343 struct buffer_head *bh, *n;
@@ -349,10 +350,21 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
349 ino = vdesc->vd_ino; 350 ino = vdesc->vd_ino;
350 cno = vdesc->vd_cno; 351 cno = vdesc->vd_cno;
351 inode = nilfs_iget_for_gc(sb, ino, cno); 352 inode = nilfs_iget_for_gc(sb, ino, cno);
352 if (unlikely(inode == NULL)) { 353 if (IS_ERR(inode)) {
353 ret = -ENOMEM; 354 ret = PTR_ERR(inode);
354 goto failed; 355 goto failed;
355 } 356 }
357 if (list_empty(&NILFS_I(inode)->i_dirty)) {
358 /*
359 * Add the inode to GC inode list. Garbage Collection
360 * is serialized and no two processes manipulate the
361 * list simultaneously.
362 */
363 igrab(inode);
364 list_add(&NILFS_I(inode)->i_dirty,
365 &nilfs->ns_gc_inodes);
366 }
367
356 do { 368 do {
357 ret = nilfs_ioctl_move_inode_block(inode, vdesc, 369 ret = nilfs_ioctl_move_inode_block(inode, vdesc,
358 &buffers); 370 &buffers);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f7560da5a567..0ca98823db59 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -256,7 +256,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *);
256extern void nilfs_truncate(struct inode *); 256extern void nilfs_truncate(struct inode *);
257extern void nilfs_evict_inode(struct inode *); 257extern void nilfs_evict_inode(struct inode *);
258extern int nilfs_setattr(struct dentry *, struct iattr *); 258extern int nilfs_setattr(struct dentry *, struct iattr *);
259int nilfs_permission(struct inode *inode, int mask); 259int nilfs_permission(struct inode *inode, int mask, unsigned int flags);
260extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, 260extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *,
261 struct buffer_head **); 261 struct buffer_head **);
262extern int nilfs_inode_dirty(struct inode *); 262extern int nilfs_inode_dirty(struct inode *);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index f804d41ec9d3..e2dcc9c733f7 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -162,10 +162,13 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
162 return &ii->vfs_inode; 162 return &ii->vfs_inode;
163} 163}
164 164
165void nilfs_destroy_inode(struct inode *inode) 165static void nilfs_i_callback(struct rcu_head *head)
166{ 166{
167 struct inode *inode = container_of(head, struct inode, i_rcu);
167 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 168 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
168 169
170 INIT_LIST_HEAD(&inode->i_dentry);
171
169 if (mdi) { 172 if (mdi) {
170 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 173 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
171 kfree(mdi); 174 kfree(mdi);
@@ -173,6 +176,11 @@ void nilfs_destroy_inode(struct inode *inode)
173 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); 176 kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
174} 177}
175 178
179void nilfs_destroy_inode(struct inode *inode)
180{
181 call_rcu(&inode->i_rcu, nilfs_i_callback);
182}
183
176static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) 184static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
177{ 185{
178 struct the_nilfs *nilfs = sbi->s_nilfs; 186 struct the_nilfs *nilfs = sbi->s_nilfs;
@@ -838,7 +846,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
838 846
839static int nilfs_tree_was_touched(struct dentry *root_dentry) 847static int nilfs_tree_was_touched(struct dentry *root_dentry)
840{ 848{
841 return atomic_read(&root_dentry->d_count) > 1; 849 return root_dentry->d_count > 1;
842} 850}
843 851
844/** 852/**
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index b04f88eed09e..f35794b97e8e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
92 92
93 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 93 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
94 94
95 wait_event(group->fanotify_data.access_waitq, event->response); 95 wait_event(group->fanotify_data.access_waitq, event->response ||
96 atomic_read(&group->fanotify_data.bypass_perm));
97
98 if (!event->response) /* bypass_perm set */
99 return 0;
96 100
97 /* userspace responded, convert to something usable */ 101 /* userspace responded, convert to something usable */
98 spin_lock(&event->lock); 102 spin_lock(&event->lock);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 063224812b7e..8b61220cffc5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -106,20 +106,29 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
106 return client_fd; 106 return client_fd;
107} 107}
108 108
109static ssize_t fill_event_metadata(struct fsnotify_group *group, 109static int fill_event_metadata(struct fsnotify_group *group,
110 struct fanotify_event_metadata *metadata, 110 struct fanotify_event_metadata *metadata,
111 struct fsnotify_event *event) 111 struct fsnotify_event *event)
112{ 112{
113 int ret = 0;
114
113 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, 115 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
114 group, metadata, event); 116 group, metadata, event);
115 117
116 metadata->event_len = FAN_EVENT_METADATA_LEN; 118 metadata->event_len = FAN_EVENT_METADATA_LEN;
119 metadata->metadata_len = FAN_EVENT_METADATA_LEN;
117 metadata->vers = FANOTIFY_METADATA_VERSION; 120 metadata->vers = FANOTIFY_METADATA_VERSION;
118 metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; 121 metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
119 metadata->pid = pid_vnr(event->tgid); 122 metadata->pid = pid_vnr(event->tgid);
120 metadata->fd = create_fd(group, event); 123 if (unlikely(event->mask & FAN_Q_OVERFLOW))
124 metadata->fd = FAN_NOFD;
125 else {
126 metadata->fd = create_fd(group, event);
127 if (metadata->fd < 0)
128 ret = metadata->fd;
129 }
121 130
122 return metadata->fd; 131 return ret;
123} 132}
124 133
125#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 134#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -200,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
200 209
201 mutex_lock(&group->fanotify_data.access_mutex); 210 mutex_lock(&group->fanotify_data.access_mutex);
202 211
203 if (group->fanotify_data.bypass_perm) { 212 if (atomic_read(&group->fanotify_data.bypass_perm)) {
204 mutex_unlock(&group->fanotify_data.access_mutex); 213 mutex_unlock(&group->fanotify_data.access_mutex);
205 kmem_cache_free(fanotify_response_event_cache, re); 214 kmem_cache_free(fanotify_response_event_cache, re);
206 event->response = FAN_ALLOW; 215 event->response = FAN_ALLOW;
@@ -257,24 +266,34 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
257 266
258 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 267 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
259 268
260 fd = fill_event_metadata(group, &fanotify_event_metadata, event); 269 ret = fill_event_metadata(group, &fanotify_event_metadata, event);
261 if (fd < 0) 270 if (ret < 0)
262 return fd; 271 goto out;
263 272
273 fd = fanotify_event_metadata.fd;
264 ret = prepare_for_access_response(group, event, fd); 274 ret = prepare_for_access_response(group, event, fd);
265 if (ret) 275 if (ret)
266 goto out_close_fd; 276 goto out_close_fd;
267 277
268 ret = -EFAULT; 278 ret = -EFAULT;
269 if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN)) 279 if (copy_to_user(buf, &fanotify_event_metadata,
280 fanotify_event_metadata.event_len))
270 goto out_kill_access_response; 281 goto out_kill_access_response;
271 282
272 return FAN_EVENT_METADATA_LEN; 283 return fanotify_event_metadata.event_len;
273 284
274out_kill_access_response: 285out_kill_access_response:
275 remove_access_response(group, event, fd); 286 remove_access_response(group, event, fd);
276out_close_fd: 287out_close_fd:
277 sys_close(fd); 288 if (fd != FAN_NOFD)
289 sys_close(fd);
290out:
291#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
292 if (event->mask & FAN_ALL_PERM_EVENTS) {
293 event->response = FAN_DENY;
294 wake_up(&group->fanotify_data.access_waitq);
295 }
296#endif
278 return ret; 297 return ret;
279} 298}
280 299
@@ -382,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
382 401
383 mutex_lock(&group->fanotify_data.access_mutex); 402 mutex_lock(&group->fanotify_data.access_mutex);
384 403
385 group->fanotify_data.bypass_perm = true; 404 atomic_inc(&group->fanotify_data.bypass_perm);
386 405
387 list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { 406 list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
388 pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, 407 pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
@@ -586,11 +605,10 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
586{ 605{
587 struct fsnotify_mark *fsn_mark; 606 struct fsnotify_mark *fsn_mark;
588 __u32 added; 607 __u32 added;
608 int ret = 0;
589 609
590 fsn_mark = fsnotify_find_vfsmount_mark(group, mnt); 610 fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
591 if (!fsn_mark) { 611 if (!fsn_mark) {
592 int ret;
593
594 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 612 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
595 return -ENOSPC; 613 return -ENOSPC;
596 614
@@ -600,17 +618,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
600 618
601 fsnotify_init_mark(fsn_mark, fanotify_free_mark); 619 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
602 ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0); 620 ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
603 if (ret) { 621 if (ret)
604 fanotify_free_mark(fsn_mark); 622 goto err;
605 return ret;
606 }
607 } 623 }
608 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 624 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
609 fsnotify_put_mark(fsn_mark); 625
610 if (added & ~mnt->mnt_fsnotify_mask) 626 if (added & ~mnt->mnt_fsnotify_mask)
611 fsnotify_recalc_vfsmount_mask(mnt); 627 fsnotify_recalc_vfsmount_mask(mnt);
612 628err:
613 return 0; 629 fsnotify_put_mark(fsn_mark);
630 return ret;
614} 631}
615 632
616static int fanotify_add_inode_mark(struct fsnotify_group *group, 633static int fanotify_add_inode_mark(struct fsnotify_group *group,
@@ -619,6 +636,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
619{ 636{
620 struct fsnotify_mark *fsn_mark; 637 struct fsnotify_mark *fsn_mark;
621 __u32 added; 638 __u32 added;
639 int ret = 0;
622 640
623 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode); 641 pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
624 642
@@ -634,8 +652,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
634 652
635 fsn_mark = fsnotify_find_inode_mark(group, inode); 653 fsn_mark = fsnotify_find_inode_mark(group, inode);
636 if (!fsn_mark) { 654 if (!fsn_mark) {
637 int ret;
638
639 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks) 655 if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
640 return -ENOSPC; 656 return -ENOSPC;
641 657
@@ -645,16 +661,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
645 661
646 fsnotify_init_mark(fsn_mark, fanotify_free_mark); 662 fsnotify_init_mark(fsn_mark, fanotify_free_mark);
647 ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0); 663 ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
648 if (ret) { 664 if (ret)
649 fanotify_free_mark(fsn_mark); 665 goto err;
650 return ret;
651 }
652 } 666 }
653 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 667 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
654 fsnotify_put_mark(fsn_mark); 668
655 if (added & ~inode->i_fsnotify_mask) 669 if (added & ~inode->i_fsnotify_mask)
656 fsnotify_recalc_inode_mask(inode); 670 fsnotify_recalc_inode_mask(inode);
657 return 0; 671err:
672 fsnotify_put_mark(fsn_mark);
673 return ret;
658} 674}
659 675
660/* fanotify syscalls */ 676/* fanotify syscalls */
@@ -687,8 +703,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
687 703
688 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */ 704 /* fsnotify_alloc_group takes a ref. Dropped in fanotify_release */
689 group = fsnotify_alloc_group(&fanotify_fsnotify_ops); 705 group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
690 if (IS_ERR(group)) 706 if (IS_ERR(group)) {
707 free_uid(user);
691 return PTR_ERR(group); 708 return PTR_ERR(group);
709 }
692 710
693 group->fanotify_data.user = user; 711 group->fanotify_data.user = user;
694 atomic_inc(&user->fanotify_listeners); 712 atomic_inc(&user->fanotify_listeners);
@@ -698,6 +716,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
698 mutex_init(&group->fanotify_data.access_mutex); 716 mutex_init(&group->fanotify_data.access_mutex);
699 init_waitqueue_head(&group->fanotify_data.access_waitq); 717 init_waitqueue_head(&group->fanotify_data.access_waitq);
700 INIT_LIST_HEAD(&group->fanotify_data.access_list); 718 INIT_LIST_HEAD(&group->fanotify_data.access_list);
719 atomic_set(&group->fanotify_data.bypass_perm, 0);
701#endif 720#endif
702 switch (flags & FAN_ALL_CLASS_BITS) { 721 switch (flags & FAN_ALL_CLASS_BITS) {
703 case FAN_CLASS_NOTIF: 722 case FAN_CLASS_NOTIF:
@@ -764,8 +783,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
764 if (flags & ~FAN_ALL_MARK_FLAGS) 783 if (flags & ~FAN_ALL_MARK_FLAGS)
765 return -EINVAL; 784 return -EINVAL;
766 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) { 785 switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
767 case FAN_MARK_ADD: 786 case FAN_MARK_ADD: /* fallthrough */
768 case FAN_MARK_REMOVE: 787 case FAN_MARK_REMOVE:
788 if (!mask)
789 return -EINVAL;
769 case FAN_MARK_FLUSH: 790 case FAN_MARK_FLUSH:
770 break; 791 break;
771 default: 792 default:
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 20dc218707ca..79b47cbb5cd8 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -59,7 +59,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
59 /* determine if the children should tell inode about their events */ 59 /* determine if the children should tell inode about their events */
60 watched = fsnotify_inode_watches_children(inode); 60 watched = fsnotify_inode_watches_children(inode);
61 61
62 spin_lock(&dcache_lock); 62 spin_lock(&inode->i_lock);
63 /* run all of the dentries associated with this inode. Since this is a 63 /* run all of the dentries associated with this inode. Since this is a
64 * directory, there damn well better only be one item on this list */ 64 * directory, there damn well better only be one item on this list */
65 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 65 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
@@ -68,19 +68,21 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
68 /* run all of the children of the original inode and fix their 68 /* run all of the children of the original inode and fix their
69 * d_flags to indicate parental interest (their parent is the 69 * d_flags to indicate parental interest (their parent is the
70 * original inode) */ 70 * original inode) */
71 spin_lock(&alias->d_lock);
71 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { 72 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
72 if (!child->d_inode) 73 if (!child->d_inode)
73 continue; 74 continue;
74 75
75 spin_lock(&child->d_lock); 76 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
76 if (watched) 77 if (watched)
77 child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; 78 child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
78 else 79 else
79 child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; 80 child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
80 spin_unlock(&child->d_lock); 81 spin_unlock(&child->d_lock);
81 } 82 }
83 spin_unlock(&alias->d_lock);
82 } 84 }
83 spin_unlock(&dcache_lock); 85 spin_unlock(&inode->i_lock);
84} 86}
85 87
86/* Notify this dentry's parent about a child's events. */ 88/* Notify this dentry's parent about a child's events. */
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 444c305a468c..4cd5d5d78f9f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
752 if (ret >= 0) 752 if (ret >= 0)
753 return ret; 753 return ret;
754 754
755 fsnotify_put_group(group);
755 atomic_dec(&user->inotify_devs); 756 atomic_dec(&user->inotify_devs);
756out_free_uid: 757out_free_uid:
757 free_uid(user); 758 free_uid(user);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 93622b175fc7..a627ed82c0a3 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -332,6 +332,13 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
332 return NULL; 332 return NULL;
333} 333}
334 334
335static void ntfs_i_callback(struct rcu_head *head)
336{
337 struct inode *inode = container_of(head, struct inode, i_rcu);
338 INIT_LIST_HEAD(&inode->i_dentry);
339 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
340}
341
335void ntfs_destroy_big_inode(struct inode *inode) 342void ntfs_destroy_big_inode(struct inode *inode)
336{ 343{
337 ntfs_inode *ni = NTFS_I(inode); 344 ntfs_inode *ni = NTFS_I(inode);
@@ -340,7 +347,7 @@ void ntfs_destroy_big_inode(struct inode *inode)
340 BUG_ON(ni->page); 347 BUG_ON(ni->page);
341 if (!atomic_dec_and_test(&ni->count)) 348 if (!atomic_dec_and_test(&ni->count))
342 BUG(); 349 BUG();
343 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); 350 call_rcu(&inode->i_rcu, ntfs_i_callback);
344} 351}
345 352
346static inline ntfs_inode *ntfs_alloc_extent_inode(void) 353static inline ntfs_inode *ntfs_alloc_extent_inode(void)
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 391915093fe1..704f6b1742f3 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -291,13 +291,17 @@ static int ocfs2_set_acl(handle_t *handle,
291 return ret; 291 return ret;
292} 292}
293 293
294int ocfs2_check_acl(struct inode *inode, int mask) 294int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
295{ 295{
296 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 296 struct ocfs2_super *osb;
297 struct buffer_head *di_bh = NULL; 297 struct buffer_head *di_bh = NULL;
298 struct posix_acl *acl; 298 struct posix_acl *acl;
299 int ret = -EAGAIN; 299 int ret = -EAGAIN;
300 300
301 if (flags & IPERM_FLAG_RCU)
302 return -ECHILD;
303
304 osb = OCFS2_SB(inode->i_sb);
301 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 305 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
302 return ret; 306 return ret;
303 307
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 5c5d31f05853..4fe7c9cf4bfb 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,7 +26,7 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29extern int ocfs2_check_acl(struct inode *, int); 29extern int ocfs2_check_acl(struct inode *, int, unsigned int);
30extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
32 struct buffer_head *, struct buffer_head *, 32 struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f1e962cb3b73..0d7c5540ad66 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -573,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
573 /* this io's submitter should not have unlocked this before we could */ 573 /* this io's submitter should not have unlocked this before we could */
574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 574 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
575 575
576 if (ocfs2_iocb_is_sem_locked(iocb)) {
577 up_read(&inode->i_alloc_sem);
578 ocfs2_iocb_clear_sem_locked(iocb);
579 }
580
576 ocfs2_iocb_clear_rw_locked(iocb); 581 ocfs2_iocb_clear_rw_locked(iocb);
577 582
578 level = ocfs2_iocb_rw_locked_level(iocb); 583 level = ocfs2_iocb_rw_locked_level(iocb);
579 if (!level)
580 up_read(&inode->i_alloc_sem);
581 ocfs2_rw_unlock(inode, level); 584 ocfs2_rw_unlock(inode, level);
582 585
583 if (is_async) 586 if (is_async)
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 76bfdfda691a..eceb456037c1 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -68,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
68 else 68 else
69 clear_bit(1, (unsigned long *)&iocb->private); 69 clear_bit(1, (unsigned long *)&iocb->private);
70} 70}
71
72/*
73 * Using a named enum representing lock types in terms of #N bit stored in
74 * iocb->private, which is going to be used for communication bewteen
75 * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
76 */
77enum ocfs2_iocb_lock_bits {
78 OCFS2_IOCB_RW_LOCK = 0,
79 OCFS2_IOCB_RW_LOCK_LEVEL,
80 OCFS2_IOCB_SEM,
81 OCFS2_IOCB_NUM_LOCKS
82};
83
71#define ocfs2_iocb_clear_rw_locked(iocb) \ 84#define ocfs2_iocb_clear_rw_locked(iocb) \
72 clear_bit(0, (unsigned long *)&iocb->private) 85 clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
73#define ocfs2_iocb_rw_locked_level(iocb) \ 86#define ocfs2_iocb_rw_locked_level(iocb) \
74 test_bit(1, (unsigned long *)&iocb->private) 87 test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
88#define ocfs2_iocb_set_sem_locked(iocb) \
89 set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
90#define ocfs2_iocb_clear_sem_locked(iocb) \
91 clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
92#define ocfs2_iocb_is_sem_locked(iocb) \
93 test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
75#endif /* OCFS2_FILE_H */ 94#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 52c7557f3e25..9e3d45bcb5fd 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -307,8 +307,7 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg)
307 307
308static void o2hb_disarm_write_timeout(struct o2hb_region *reg) 308static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
309{ 309{
310 cancel_delayed_work(&reg->hr_write_timeout_work); 310 cancel_delayed_work_sync(&reg->hr_write_timeout_work);
311 flush_scheduled_work();
312} 311}
313 312
314static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc) 313static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
@@ -1964,8 +1963,10 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
1964 if (reg == NULL) 1963 if (reg == NULL)
1965 return ERR_PTR(-ENOMEM); 1964 return ERR_PTR(-ENOMEM);
1966 1965
1967 if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) 1966 if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) {
1968 return ERR_PTR(-ENAMETOOLONG); 1967 ret = -ENAMETOOLONG;
1968 goto free;
1969 }
1969 1970
1970 spin_lock(&o2hb_live_lock); 1971 spin_lock(&o2hb_live_lock);
1971 reg->hr_region_num = 0; 1972 reg->hr_region_num = 0;
@@ -1974,7 +1975,8 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
1974 O2NM_MAX_REGIONS); 1975 O2NM_MAX_REGIONS);
1975 if (reg->hr_region_num >= O2NM_MAX_REGIONS) { 1976 if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
1976 spin_unlock(&o2hb_live_lock); 1977 spin_unlock(&o2hb_live_lock);
1977 return ERR_PTR(-EFBIG); 1978 ret = -EFBIG;
1979 goto free;
1978 } 1980 }
1979 set_bit(reg->hr_region_num, o2hb_region_bitmap); 1981 set_bit(reg->hr_region_num, o2hb_region_bitmap);
1980 } 1982 }
@@ -1986,10 +1988,13 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
1986 ret = o2hb_debug_region_init(reg, o2hb_debug_dir); 1988 ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
1987 if (ret) { 1989 if (ret) {
1988 config_item_put(&reg->hr_item); 1990 config_item_put(&reg->hr_item);
1989 return ERR_PTR(ret); 1991 goto free;
1990 } 1992 }
1991 1993
1992 return &reg->hr_item; 1994 return &reg->hr_item;
1995free:
1996 kfree(reg);
1997 return ERR_PTR(ret);
1993} 1998}
1994 1999
1995static void o2hb_heartbeat_group_drop_item(struct config_group *group, 2000static void o2hb_heartbeat_group_drop_item(struct config_group *group,
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index c7fba396392d..6c61771469af 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
113 define_mask(QUOTA), 113 define_mask(QUOTA),
114 define_mask(REFCOUNT), 114 define_mask(REFCOUNT),
115 define_mask(BASTS), 115 define_mask(BASTS),
116 define_mask(RESERVATIONS),
117 define_mask(CLUSTER),
116 define_mask(ERROR), 118 define_mask(ERROR),
117 define_mask(NOTICE), 119 define_mask(NOTICE),
118 define_mask(KTHREAD), 120 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
120}; 121};
121 122
122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 123static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index ea2ed9f56c94..34d6544357d9 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -81,7 +81,7 @@
81#include <linux/sched.h> 81#include <linux/sched.h>
82 82
83/* bits that are frequently given and infrequently matched in the low word */ 83/* bits that are frequently given and infrequently matched in the low word */
84/* NOTE: If you add a flag, you need to also update mlog.c! */ 84/* NOTE: If you add a flag, you need to also update masklog.c! */
85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */ 85#define ML_ENTRY 0x0000000000000001ULL /* func call entry */
86#define ML_EXIT 0x0000000000000002ULL /* func call exit */ 86#define ML_EXIT 0x0000000000000002ULL /* func call exit */
87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */ 87#define ML_TCP 0x0000000000000004ULL /* net cluster/tcp.c */
@@ -114,13 +114,14 @@
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ 115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */ 116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
117#define ML_BASTS 0x0000001000000000ULL /* dlmglue asts and basts */ 117#define ML_BASTS 0x0000000100000000ULL /* dlmglue asts and basts */
118#define ML_RESERVATIONS 0x0000000200000000ULL /* ocfs2 alloc reservations */
119#define ML_CLUSTER 0x0000000400000000ULL /* cluster stack */
120
118/* bits that are infrequently given and frequently matched in the high word */ 121/* bits that are infrequently given and frequently matched in the high word */
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 122#define ML_ERROR 0x1000000000000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 123#define ML_NOTICE 0x2000000000000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 124#define ML_KTHREAD 0x4000000000000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
123#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */
124 125
125#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 126#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
126#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 127#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index cf3e16696216..a87366750f23 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -325,5 +325,7 @@ void o2quo_init(void)
325 325
326void o2quo_exit(void) 326void o2quo_exit(void)
327{ 327{
328 flush_scheduled_work(); 328 struct o2quo_state *qs = &o2quo_state;
329
330 flush_work_sync(&qs->qs_work);
329} 331}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index edaded48e7e9..6d80ecc7834f 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -52,9 +52,15 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
52static int ocfs2_dentry_revalidate(struct dentry *dentry, 52static int ocfs2_dentry_revalidate(struct dentry *dentry,
53 struct nameidata *nd) 53 struct nameidata *nd)
54{ 54{
55 struct inode *inode = dentry->d_inode; 55 struct inode *inode;
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); 57 struct ocfs2_super *osb;
58
59 if (nd->flags & LOOKUP_RCU)
60 return -ECHILD;
61
62 inode = dentry->d_inode;
63 osb = OCFS2_SB(dentry->d_sb);
58 64
59 mlog_entry("(0x%p, '%.*s')\n", dentry, 65 mlog_entry("(0x%p, '%.*s')\n", dentry,
60 dentry->d_name.len, dentry->d_name.name); 66 dentry->d_name.len, dentry->d_name.name);
@@ -169,23 +175,25 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
169 struct list_head *p; 175 struct list_head *p;
170 struct dentry *dentry = NULL; 176 struct dentry *dentry = NULL;
171 177
172 spin_lock(&dcache_lock); 178 spin_lock(&inode->i_lock);
173
174 list_for_each(p, &inode->i_dentry) { 179 list_for_each(p, &inode->i_dentry) {
175 dentry = list_entry(p, struct dentry, d_alias); 180 dentry = list_entry(p, struct dentry, d_alias);
176 181
182 spin_lock(&dentry->d_lock);
177 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 183 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
178 mlog(0, "dentry found: %.*s\n", 184 mlog(0, "dentry found: %.*s\n",
179 dentry->d_name.len, dentry->d_name.name); 185 dentry->d_name.len, dentry->d_name.name);
180 186
181 dget_locked(dentry); 187 dget_dlock(dentry);
188 spin_unlock(&dentry->d_lock);
182 break; 189 break;
183 } 190 }
191 spin_unlock(&dentry->d_lock);
184 192
185 dentry = NULL; 193 dentry = NULL;
186 } 194 }
187 195
188 spin_unlock(&dcache_lock); 196 spin_unlock(&inode->i_lock);
189 197
190 return dentry; 198 return dentry;
191} 199}
@@ -476,7 +484,6 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
476 484
477out: 485out:
478 iput(inode); 486 iput(inode);
479 ocfs2_dentry_attach_gen(dentry);
480} 487}
481 488
482/* 489/*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c49f6de0e7ab..d417b3f9b0c7 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2461 2461
2462 di->i_dx_root = cpu_to_le64(dr_blkno); 2462 di->i_dx_root = cpu_to_le64(dr_blkno);
2463 2463
2464 spin_lock(&OCFS2_I(dir)->ip_lock);
2464 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; 2465 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2465 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 2466 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2467 spin_unlock(&OCFS2_I(dir)->ip_lock);
2466 2468
2467 ocfs2_journal_dirty(handle, di_bh); 2469 ocfs2_journal_dirty(handle, di_bh);
2468 2470
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4466 goto out_commit; 4468 goto out_commit;
4467 } 4469 }
4468 4470
4471 spin_lock(&OCFS2_I(dir)->ip_lock);
4469 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL; 4472 OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
4470 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 4473 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
4474 spin_unlock(&OCFS2_I(dir)->ip_lock);
4471 di->i_dx_root = cpu_to_le64(0ULL); 4475 di->i_dx_root = cpu_to_le64(0ULL);
4472 4476
4473 ocfs2_journal_dirty(handle, di_bh); 4477 ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 58a93b953735..cc2aaa96cfe5 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -959,7 +959,7 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
959 r += O2HB_MAX_REGION_NAME_LEN; 959 r += O2HB_MAX_REGION_NAME_LEN;
960 } 960 }
961 961
962 local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); 962 local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC);
963 if (!local) { 963 if (!local) {
964 status = -ENOMEM; 964 status = -ENOMEM;
965 goto bail; 965 goto bail;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f564b0e5f80d..59f0f6bdfc62 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
2346 */ 2346 */
2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, 2347static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2348 struct dlm_lock_resource *res, 2348 struct dlm_lock_resource *res,
2349 int *numlocks) 2349 int *numlocks,
2350 int *hasrefs)
2350{ 2351{
2351 int ret; 2352 int ret;
2352 int i; 2353 int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2356 2357
2357 assert_spin_locked(&res->spinlock); 2358 assert_spin_locked(&res->spinlock);
2358 2359
2360 *numlocks = 0;
2361 *hasrefs = 0;
2362
2359 ret = -EINVAL; 2363 ret = -EINVAL;
2360 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { 2364 if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
2361 mlog(0, "cannot migrate lockres with unknown owner!\n"); 2365 mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2386 } 2390 }
2387 2391
2388 *numlocks = count; 2392 *numlocks = count;
2389 mlog(0, "migrateable lockres having %d locks\n", *numlocks); 2393
2394 count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2395 if (count < O2NM_MAX_NODES)
2396 *hasrefs = 1;
2397
2398 mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
2399 res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
2390 2400
2391leave: 2401leave:
2392 return ret; 2402 return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2408 const char *name; 2418 const char *name;
2409 unsigned int namelen; 2419 unsigned int namelen;
2410 int mle_added = 0; 2420 int mle_added = 0;
2411 int numlocks; 2421 int numlocks, hasrefs;
2412 int wake = 0; 2422 int wake = 0;
2413 2423
2414 if (!dlm_grab(dlm)) 2424 if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2417 name = res->lockname.name; 2427 name = res->lockname.name;
2418 namelen = res->lockname.len; 2428 namelen = res->lockname.len;
2419 2429
2420 mlog(0, "migrating %.*s to %u\n", namelen, name, target); 2430 mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
2421 2431
2422 /* 2432 /*
2423 * ensure this lockres is a proper candidate for migration 2433 * ensure this lockres is a proper candidate for migration
2424 */ 2434 */
2425 spin_lock(&res->spinlock); 2435 spin_lock(&res->spinlock);
2426 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2436 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2427 if (ret < 0) { 2437 if (ret < 0) {
2428 spin_unlock(&res->spinlock); 2438 spin_unlock(&res->spinlock);
2429 goto leave; 2439 goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2431 spin_unlock(&res->spinlock); 2441 spin_unlock(&res->spinlock);
2432 2442
2433 /* no work to do */ 2443 /* no work to do */
2434 if (numlocks == 0) { 2444 if (numlocks == 0 && !hasrefs)
2435 mlog(0, "no locks were found on this lockres! done!\n");
2436 goto leave; 2445 goto leave;
2437 }
2438 2446
2439 /* 2447 /*
2440 * preallocate up front 2448 * preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2459 * find a node to migrate the lockres to 2467 * find a node to migrate the lockres to
2460 */ 2468 */
2461 2469
2462 mlog(0, "picking a migration node\n");
2463 spin_lock(&dlm->spinlock); 2470 spin_lock(&dlm->spinlock);
2464 /* pick a new node */ 2471 /* pick a new node */
2465 if (!test_bit(target, dlm->domain_map) || 2472 if (!test_bit(target, dlm->domain_map) ||
2466 target >= O2NM_MAX_NODES) { 2473 target >= O2NM_MAX_NODES) {
2467 target = dlm_pick_migration_target(dlm, res); 2474 target = dlm_pick_migration_target(dlm, res);
2468 } 2475 }
2469 mlog(0, "node %u chosen for migration\n", target); 2476 mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
2477 namelen, name, target);
2470 2478
2471 if (target >= O2NM_MAX_NODES || 2479 if (target >= O2NM_MAX_NODES ||
2472 !test_bit(target, dlm->domain_map)) { 2480 !test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2667{ 2675{
2668 int ret; 2676 int ret;
2669 int lock_dropped = 0; 2677 int lock_dropped = 0;
2670 int numlocks; 2678 int numlocks, hasrefs;
2671 2679
2672 spin_lock(&res->spinlock); 2680 spin_lock(&res->spinlock);
2673 if (res->owner != dlm->node_num) { 2681 if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2681 } 2689 }
2682 2690
2683 /* No need to migrate a lockres having no locks */ 2691 /* No need to migrate a lockres having no locks */
2684 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks); 2692 ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
2685 if (ret >= 0 && numlocks == 0) { 2693 if (ret >= 0 && numlocks == 0 && !hasrefs) {
2686 spin_unlock(&res->spinlock); 2694 spin_unlock(&res->spinlock);
2687 goto leave; 2695 goto leave;
2688 } 2696 }
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2915 } 2923 }
2916 queue++; 2924 queue++;
2917 } 2925 }
2926
2927 nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
2928 if (nodenum < O2NM_MAX_NODES) {
2929 spin_unlock(&res->spinlock);
2930 return nodenum;
2931 }
2918 spin_unlock(&res->spinlock); 2932 spin_unlock(&res->spinlock);
2919 mlog(0, "have not found a suitable target yet! checking domain map\n"); 2933 mlog(0, "have not found a suitable target yet! checking domain map\n");
2920 2934
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b2df490a19ed..8c5c0eddc365 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -351,11 +351,18 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
351 return &ip->ip_vfs_inode; 351 return &ip->ip_vfs_inode;
352} 352}
353 353
354static void dlmfs_destroy_inode(struct inode *inode) 354static void dlmfs_i_callback(struct rcu_head *head)
355{ 355{
356 struct inode *inode = container_of(head, struct inode, i_rcu);
357 INIT_LIST_HEAD(&inode->i_dentry);
356 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); 358 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
357} 359}
358 360
361static void dlmfs_destroy_inode(struct inode *inode)
362{
363 call_rcu(&inode->i_rcu, dlmfs_i_callback);
364}
365
359static void dlmfs_evict_inode(struct inode *inode) 366static void dlmfs_evict_inode(struct inode *inode)
360{ 367{
361 int status; 368 int status;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 19ad145d2af3..6adafa576065 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -138,7 +138,7 @@ check_gen:
138 138
139 result = d_obtain_alias(inode); 139 result = d_obtain_alias(inode);
140 if (!IS_ERR(result)) 140 if (!IS_ERR(result))
141 result->d_op = &ocfs2_dentry_ops; 141 d_set_d_op(result, &ocfs2_dentry_ops);
142 else 142 else
143 mlog_errno(PTR_ERR(result)); 143 mlog_errno(PTR_ERR(result));
144 144
@@ -176,7 +176,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
176 176
177 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); 177 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
178 if (!IS_ERR(parent)) 178 if (!IS_ERR(parent))
179 parent->d_op = &ocfs2_dentry_ops; 179 d_set_d_op(parent, &ocfs2_dentry_ops);
180 180
181bail_unlock: 181bail_unlock:
182 ocfs2_inode_unlock(dir, 0); 182 ocfs2_inode_unlock(dir, 0);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 77b4c04a2809..bdadbae09094 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1307,10 +1307,13 @@ bail:
1307 return err; 1307 return err;
1308} 1308}
1309 1309
1310int ocfs2_permission(struct inode *inode, int mask) 1310int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1311{ 1311{
1312 int ret; 1312 int ret;
1313 1313
1314 if (flags & IPERM_FLAG_RCU)
1315 return -ECHILD;
1316
1314 mlog_entry_void(); 1317 mlog_entry_void();
1315 1318
1316 ret = ocfs2_inode_lock(inode, NULL, 0); 1319 ret = ocfs2_inode_lock(inode, NULL, 0);
@@ -1320,7 +1323,7 @@ int ocfs2_permission(struct inode *inode, int mask)
1320 goto out; 1323 goto out;
1321 } 1324 }
1322 1325
1323 ret = generic_permission(inode, mask, ocfs2_check_acl); 1326 ret = generic_permission(inode, mask, flags, ocfs2_check_acl);
1324 1327
1325 ocfs2_inode_unlock(inode, 0); 1328 ocfs2_inode_unlock(inode, 0);
1326out: 1329out:
@@ -2241,11 +2244,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2241 2244
2242 mutex_lock(&inode->i_mutex); 2245 mutex_lock(&inode->i_mutex);
2243 2246
2247 ocfs2_iocb_clear_sem_locked(iocb);
2248
2244relock: 2249relock:
2245 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2250 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
2246 if (direct_io) { 2251 if (direct_io) {
2247 down_read(&inode->i_alloc_sem); 2252 down_read(&inode->i_alloc_sem);
2248 have_alloc_sem = 1; 2253 have_alloc_sem = 1;
2254 /* communicate with ocfs2_dio_end_io */
2255 ocfs2_iocb_set_sem_locked(iocb);
2249 } 2256 }
2250 2257
2251 /* 2258 /*
@@ -2382,8 +2389,10 @@ out:
2382 ocfs2_rw_unlock(inode, rw_level); 2389 ocfs2_rw_unlock(inode, rw_level);
2383 2390
2384out_sems: 2391out_sems:
2385 if (have_alloc_sem) 2392 if (have_alloc_sem) {
2386 up_read(&inode->i_alloc_sem); 2393 up_read(&inode->i_alloc_sem);
2394 ocfs2_iocb_clear_sem_locked(iocb);
2395 }
2387 2396
2388 mutex_unlock(&inode->i_mutex); 2397 mutex_unlock(&inode->i_mutex);
2389 2398
@@ -2527,6 +2536,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2527 goto bail; 2536 goto bail;
2528 } 2537 }
2529 2538
2539 ocfs2_iocb_clear_sem_locked(iocb);
2540
2530 /* 2541 /*
2531 * buffered reads protect themselves in ->readpage(). O_DIRECT reads 2542 * buffered reads protect themselves in ->readpage(). O_DIRECT reads
2532 * need locks to protect pending reads from racing with truncate. 2543 * need locks to protect pending reads from racing with truncate.
@@ -2534,6 +2545,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2534 if (filp->f_flags & O_DIRECT) { 2545 if (filp->f_flags & O_DIRECT) {
2535 down_read(&inode->i_alloc_sem); 2546 down_read(&inode->i_alloc_sem);
2536 have_alloc_sem = 1; 2547 have_alloc_sem = 1;
2548 ocfs2_iocb_set_sem_locked(iocb);
2537 2549
2538 ret = ocfs2_rw_lock(inode, 0); 2550 ret = ocfs2_rw_lock(inode, 0);
2539 if (ret < 0) { 2551 if (ret < 0) {
@@ -2575,8 +2587,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2575 } 2587 }
2576 2588
2577bail: 2589bail:
2578 if (have_alloc_sem) 2590 if (have_alloc_sem) {
2579 up_read(&inode->i_alloc_sem); 2591 up_read(&inode->i_alloc_sem);
2592 ocfs2_iocb_clear_sem_locked(iocb);
2593 }
2580 if (rw_level != -1) 2594 if (rw_level != -1)
2581 ocfs2_rw_unlock(inode, rw_level); 2595 ocfs2_rw_unlock(inode, rw_level);
2582 mlog_exit(ret); 2596 mlog_exit(ret);
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 97bf761c9e7c..f5afbbef6703 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
63 struct kstat *stat); 63 struct kstat *stat);
64int ocfs2_permission(struct inode *inode, int mask); 64int ocfs2_permission(struct inode *inode, int mask, unsigned int flags);
65 65
66int ocfs2_should_update_atime(struct inode *inode, 66int ocfs2_should_update_atime(struct inode *inode,
67 struct vfsmount *vfsmnt); 67 struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ff5744e1e36f..d14cad6e2e41 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
147 spin_unlock(&oi->ip_lock); 147 spin_unlock(&oi->ip_lock);
148 148
149bail_add: 149bail_add:
150 dentry->d_op = &ocfs2_dentry_ops; 150 d_set_d_op(dentry, &ocfs2_dentry_ops);
151 ret = d_splice_alias(inode, dentry); 151 ret = d_splice_alias(inode, dentry);
152 152
153 if (inode) { 153 if (inode) {
@@ -415,7 +415,7 @@ static int ocfs2_mknod(struct inode *dir,
415 mlog_errno(status); 415 mlog_errno(status);
416 goto leave; 416 goto leave;
417 } 417 }
418 dentry->d_op = &ocfs2_dentry_ops; 418 d_set_d_op(dentry, &ocfs2_dentry_ops);
419 419
420 status = ocfs2_add_entry(handle, dentry, inode, 420 status = ocfs2_add_entry(handle, dentry, inode,
421 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 421 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
@@ -743,7 +743,7 @@ static int ocfs2_link(struct dentry *old_dentry,
743 } 743 }
744 744
745 ihold(inode); 745 ihold(inode);
746 dentry->d_op = &ocfs2_dentry_ops; 746 d_set_d_op(dentry, &ocfs2_dentry_ops);
747 d_instantiate(dentry, inode); 747 d_instantiate(dentry, inode);
748 748
749out_commit: 749out_commit:
@@ -1794,7 +1794,7 @@ static int ocfs2_symlink(struct inode *dir,
1794 mlog_errno(status); 1794 mlog_errno(status);
1795 goto bail; 1795 goto bail;
1796 } 1796 }
1797 dentry->d_op = &ocfs2_dentry_ops; 1797 d_set_d_op(dentry, &ocfs2_dentry_ops);
1798 1798
1799 status = ocfs2_add_entry(handle, dentry, inode, 1799 status = ocfs2_add_entry(handle, dentry, inode,
1800 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1800 le64_to_cpu(fe->i_blkno), parent_fe_bh,
@@ -2459,7 +2459,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2459 goto out_commit; 2459 goto out_commit;
2460 } 2460 }
2461 2461
2462 dentry->d_op = &ocfs2_dentry_ops; 2462 d_set_d_op(dentry, &ocfs2_dentry_ops);
2463 d_instantiate(dentry, inode); 2463 d_instantiate(dentry, inode);
2464 status = 0; 2464 status = 0;
2465out_commit: 2465out_commit:
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d8408217e3bd..70dd3b1798f1 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -159,7 +159,9 @@ struct ocfs2_lock_res {
159 char l_name[OCFS2_LOCK_ID_MAX_LEN]; 159 char l_name[OCFS2_LOCK_ID_MAX_LEN];
160 unsigned int l_ro_holders; 160 unsigned int l_ro_holders;
161 unsigned int l_ex_holders; 161 unsigned int l_ex_holders;
162 unsigned char l_level; 162 signed char l_level;
163 signed char l_requested;
164 signed char l_blocking;
163 165
164 /* Data packed - type enum ocfs2_lock_type */ 166 /* Data packed - type enum ocfs2_lock_type */
165 unsigned char l_type; 167 unsigned char l_type;
@@ -169,8 +171,6 @@ struct ocfs2_lock_res {
169 unsigned char l_action; 171 unsigned char l_action;
170 /* Data packed - enum type ocfs2_unlock_action */ 172 /* Data packed - enum type ocfs2_unlock_action */
171 unsigned char l_unlock_action; 173 unsigned char l_unlock_action;
172 unsigned char l_requested;
173 unsigned char l_blocking;
174 unsigned int l_pending_gen; 174 unsigned int l_pending_gen;
175 175
176 spinlock_t l_lock; 176 spinlock_t l_lock;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c2e4f8222e2f..bf2e7764920e 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -350,7 +350,7 @@ enum {
350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE 350#define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
351 NUM_SYSTEM_INODES 351 NUM_SYSTEM_INODES
352}; 352};
353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE 353#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
354#define NUM_LOCAL_SYSTEM_INODES \ 354#define NUM_LOCAL_SYSTEM_INODES \
355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) 355 (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
356 356
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 252e7c82f929..a5ebe421195f 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -190,7 +190,7 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
190 return c; 190 return c;
191 } 191 }
192 192
193 return c; 193 return NULL;
194} 194}
195 195
196/* 196/*
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f02c0ef31578..17ff46fa8a10 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,7 +41,6 @@
41#include <linux/mount.h> 41#include <linux/mount.h>
42#include <linux/seq_file.h> 42#include <linux/seq_file.h>
43#include <linux/quotaops.h> 43#include <linux/quotaops.h>
44#include <linux/smp_lock.h>
45 44
46#define MLOG_MASK_PREFIX ML_SUPER 45#define MLOG_MASK_PREFIX ML_SUPER
47#include <cluster/masklog.h> 46#include <cluster/masklog.h>
@@ -570,11 +569,18 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
570 return &oi->vfs_inode; 569 return &oi->vfs_inode;
571} 570}
572 571
573static void ocfs2_destroy_inode(struct inode *inode) 572static void ocfs2_i_callback(struct rcu_head *head)
574{ 573{
574 struct inode *inode = container_of(head, struct inode, i_rcu);
575 INIT_LIST_HEAD(&inode->i_dentry);
575 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 576 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
576} 577}
577 578
579static void ocfs2_destroy_inode(struct inode *inode)
580{
581 call_rcu(&inode->i_rcu, ocfs2_i_callback);
582}
583
578static unsigned long long ocfs2_max_file_offset(unsigned int bbits, 584static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
579 unsigned int cbits) 585 unsigned int cbits)
580{ 586{
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ddb1f41376e5..a2a5bff774e3 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -343,11 +343,18 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
343 return &oi->vfs_inode; 343 return &oi->vfs_inode;
344} 344}
345 345
346static void openprom_destroy_inode(struct inode *inode) 346static void openprom_i_callback(struct rcu_head *head)
347{ 347{
348 struct inode *inode = container_of(head, struct inode, i_rcu);
349 INIT_LIST_HEAD(&inode->i_dentry);
348 kmem_cache_free(op_inode_cachep, OP_I(inode)); 350 kmem_cache_free(op_inode_cachep, OP_I(inode));
349} 351}
350 352
353static void openprom_destroy_inode(struct inode *inode)
354{
355 call_rcu(&inode->i_rcu, openprom_i_callback);
356}
357
351static struct inode *openprom_iget(struct super_block *sb, ino_t ino) 358static struct inode *openprom_iget(struct super_block *sb, ino_t ino)
352{ 359{
353 struct inode *inode; 360 struct inode *inode;
@@ -418,7 +425,7 @@ out_no_root:
418static struct dentry *openprom_mount(struct file_system_type *fs_type, 425static struct dentry *openprom_mount(struct file_system_type *fs_type,
419 int flags, const char *dev_name, void *data) 426 int flags, const char *dev_name, void *data)
420{ 427{
421 return mount_single(fs_type, flags, data, openprom_fill_super) 428 return mount_single(fs_type, flags, data, openprom_fill_super);
422} 429}
423 430
424static struct file_system_type openprom_fs_type = { 431static struct file_system_type openprom_fs_type = {
diff --git a/fs/pipe.c b/fs/pipe.c
index a8012a955720..68f1f8e4e23b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -999,12 +999,12 @@ struct file *create_write_pipe(int flags)
999 goto err; 999 goto err;
1000 1000
1001 err = -ENOMEM; 1001 err = -ENOMEM;
1002 path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); 1002 path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
1003 if (!path.dentry) 1003 if (!path.dentry)
1004 goto err_inode; 1004 goto err_inode;
1005 path.mnt = mntget(pipe_mnt); 1005 path.mnt = mntget(pipe_mnt);
1006 1006
1007 path.dentry->d_op = &pipefs_dentry_operations; 1007 d_set_d_op(path.dentry, &pipefs_dentry_operations);
1008 d_instantiate(path.dentry, inode); 1008 d_instantiate(path.dentry, inode);
1009 1009
1010 err = -ENFILE; 1010 err = -ENFILE;
@@ -1199,12 +1199,24 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
1199 return ret; 1199 return ret;
1200} 1200}
1201 1201
1202/*
1203 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
1204 * location, so checking ->i_pipe is not enough to verify that this is a
1205 * pipe.
1206 */
1207struct pipe_inode_info *get_pipe_info(struct file *file)
1208{
1209 struct inode *i = file->f_path.dentry->d_inode;
1210
1211 return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
1212}
1213
1202long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1214long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1203{ 1215{
1204 struct pipe_inode_info *pipe; 1216 struct pipe_inode_info *pipe;
1205 long ret; 1217 long ret;
1206 1218
1207 pipe = file->f_path.dentry->d_inode->i_pipe; 1219 pipe = get_pipe_info(file);
1208 if (!pipe) 1220 if (!pipe)
1209 return -EBADF; 1221 return -EBADF;
1210 1222
@@ -1241,6 +1253,10 @@ out:
1241 return ret; 1253 return ret;
1242} 1254}
1243 1255
1256static const struct super_operations pipefs_ops = {
1257 .destroy_inode = free_inode_nonrcu,
1258};
1259
1244/* 1260/*
1245 * pipefs should _never_ be mounted by userland - too much of security hassle, 1261 * pipefs should _never_ be mounted by userland - too much of security hassle,
1246 * no real gain from having the whole whorehouse mounted. So we don't need 1262 * no real gain from having the whole whorehouse mounted. So we don't need
@@ -1250,7 +1266,7 @@ out:
1250static struct dentry *pipefs_mount(struct file_system_type *fs_type, 1266static struct dentry *pipefs_mount(struct file_system_type *fs_type,
1251 int flags, const char *dev_name, void *data) 1267 int flags, const char *dev_name, void *data)
1252{ 1268{
1253 return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); 1269 return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC);
1254} 1270}
1255 1271
1256static struct file_system_type pipe_fs_type = { 1272static struct file_system_type pipe_fs_type = {
@@ -1276,7 +1292,7 @@ static int __init init_pipe_fs(void)
1276static void __exit exit_pipe_fs(void) 1292static void __exit exit_pipe_fs(void)
1277{ 1293{
1278 unregister_filesystem(&pipe_fs_type); 1294 unregister_filesystem(&pipe_fs_type);
1279 mntput(pipe_mnt); 1295 mntput_long(pipe_mnt);
1280} 1296}
1281 1297
1282fs_initcall(init_pipe_fs); 1298fs_initcall(init_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index 8066b8dd748f..d42514e32380 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -288,7 +288,7 @@ out:
288 */ 288 */
289static inline int do_refcount_check(struct vfsmount *mnt, int count) 289static inline int do_refcount_check(struct vfsmount *mnt, int count)
290{ 290{
291 int mycount = atomic_read(&mnt->mnt_count) - mnt->mnt_ghosts; 291 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
292 return (mycount > count); 292 return (mycount > count);
293} 293}
294 294
@@ -300,7 +300,7 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
300 * Check if any of these mounts that **do not have submounts** 300 * Check if any of these mounts that **do not have submounts**
301 * have more references than 'refcnt'. If so return busy. 301 * have more references than 'refcnt'. If so return busy.
302 * 302 *
303 * vfsmount lock must be held for read or write 303 * vfsmount lock must be held for write
304 */ 304 */
305int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 305int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
306{ 306{
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 2758e2afc518..288a49e098bf 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -15,6 +15,7 @@ proc-y += devices.o
15proc-y += interrupts.o 15proc-y += interrupts.o
16proc-y += loadavg.o 16proc-y += loadavg.o
17proc-y += meminfo.o 17proc-y += meminfo.o
18proc-y += proc_console.o
18proc-y += stat.o 19proc-y += stat.o
19proc-y += uptime.o 20proc-y += uptime.o
20proc-y += version.o 21proc-y += version.o
diff --git a/fs/proc/base.c b/fs/proc/base.c
index f3d02ca461ec..b20962c71a52 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1407,6 +1407,82 @@ static const struct file_operations proc_pid_sched_operations = {
1407 1407
1408#endif 1408#endif
1409 1409
1410#ifdef CONFIG_SCHED_AUTOGROUP
1411/*
1412 * Print out autogroup related information:
1413 */
1414static int sched_autogroup_show(struct seq_file *m, void *v)
1415{
1416 struct inode *inode = m->private;
1417 struct task_struct *p;
1418
1419 p = get_proc_task(inode);
1420 if (!p)
1421 return -ESRCH;
1422 proc_sched_autogroup_show_task(p, m);
1423
1424 put_task_struct(p);
1425
1426 return 0;
1427}
1428
1429static ssize_t
1430sched_autogroup_write(struct file *file, const char __user *buf,
1431 size_t count, loff_t *offset)
1432{
1433 struct inode *inode = file->f_path.dentry->d_inode;
1434 struct task_struct *p;
1435 char buffer[PROC_NUMBUF];
1436 long nice;
1437 int err;
1438
1439 memset(buffer, 0, sizeof(buffer));
1440 if (count > sizeof(buffer) - 1)
1441 count = sizeof(buffer) - 1;
1442 if (copy_from_user(buffer, buf, count))
1443 return -EFAULT;
1444
1445 err = strict_strtol(strstrip(buffer), 0, &nice);
1446 if (err)
1447 return -EINVAL;
1448
1449 p = get_proc_task(inode);
1450 if (!p)
1451 return -ESRCH;
1452
1453 err = nice;
1454 err = proc_sched_autogroup_set_nice(p, &err);
1455 if (err)
1456 count = err;
1457
1458 put_task_struct(p);
1459
1460 return count;
1461}
1462
1463static int sched_autogroup_open(struct inode *inode, struct file *filp)
1464{
1465 int ret;
1466
1467 ret = single_open(filp, sched_autogroup_show, NULL);
1468 if (!ret) {
1469 struct seq_file *m = filp->private_data;
1470
1471 m->private = inode;
1472 }
1473 return ret;
1474}
1475
1476static const struct file_operations proc_pid_sched_autogroup_operations = {
1477 .open = sched_autogroup_open,
1478 .read = seq_read,
1479 .write = sched_autogroup_write,
1480 .llseek = seq_lseek,
1481 .release = single_release,
1482};
1483
1484#endif /* CONFIG_SCHED_AUTOGROUP */
1485
1410static ssize_t comm_write(struct file *file, const char __user *buf, 1486static ssize_t comm_write(struct file *file, const char __user *buf,
1411 size_t count, loff_t *offset) 1487 size_t count, loff_t *offset)
1412{ 1488{
@@ -1574,7 +1650,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
1574 if (!tmp) 1650 if (!tmp)
1575 return -ENOMEM; 1651 return -ENOMEM;
1576 1652
1577 pathname = d_path_with_unreachable(path, tmp, PAGE_SIZE); 1653 pathname = d_path(path, tmp, PAGE_SIZE);
1578 len = PTR_ERR(pathname); 1654 len = PTR_ERR(pathname);
1579 if (IS_ERR(pathname)) 1655 if (IS_ERR(pathname))
1580 goto out; 1656 goto out;
@@ -1719,10 +1795,16 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat
1719 */ 1795 */
1720static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1796static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1721{ 1797{
1722 struct inode *inode = dentry->d_inode; 1798 struct inode *inode;
1723 struct task_struct *task = get_proc_task(inode); 1799 struct task_struct *task;
1724 const struct cred *cred; 1800 const struct cred *cred;
1725 1801
1802 if (nd && nd->flags & LOOKUP_RCU)
1803 return -ECHILD;
1804
1805 inode = dentry->d_inode;
1806 task = get_proc_task(inode);
1807
1726 if (task) { 1808 if (task) {
1727 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || 1809 if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
1728 task_dumpable(task)) { 1810 task_dumpable(task)) {
@@ -1744,7 +1826,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1744 return 0; 1826 return 0;
1745} 1827}
1746 1828
1747static int pid_delete_dentry(struct dentry * dentry) 1829static int pid_delete_dentry(const struct dentry * dentry)
1748{ 1830{
1749 /* Is the task we represent dead? 1831 /* Is the task we represent dead?
1750 * If so, then don't put the dentry on the lru list, 1832 * If so, then don't put the dentry on the lru list,
@@ -1888,12 +1970,19 @@ static int proc_fd_link(struct inode *inode, struct path *path)
1888 1970
1889static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1971static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1890{ 1972{
1891 struct inode *inode = dentry->d_inode; 1973 struct inode *inode;
1892 struct task_struct *task = get_proc_task(inode); 1974 struct task_struct *task;
1893 int fd = proc_fd(inode); 1975 int fd;
1894 struct files_struct *files; 1976 struct files_struct *files;
1895 const struct cred *cred; 1977 const struct cred *cred;
1896 1978
1979 if (nd && nd->flags & LOOKUP_RCU)
1980 return -ECHILD;
1981
1982 inode = dentry->d_inode;
1983 task = get_proc_task(inode);
1984 fd = proc_fd(inode);
1985
1897 if (task) { 1986 if (task) {
1898 files = get_files_struct(task); 1987 files = get_files_struct(task);
1899 if (files) { 1988 if (files) {
@@ -1969,7 +2058,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1969 inode->i_op = &proc_pid_link_inode_operations; 2058 inode->i_op = &proc_pid_link_inode_operations;
1970 inode->i_size = 64; 2059 inode->i_size = 64;
1971 ei->op.proc_get_link = proc_fd_link; 2060 ei->op.proc_get_link = proc_fd_link;
1972 dentry->d_op = &tid_fd_dentry_operations; 2061 d_set_d_op(dentry, &tid_fd_dentry_operations);
1973 d_add(dentry, inode); 2062 d_add(dentry, inode);
1974 /* Close the race of the process dying before we return the dentry */ 2063 /* Close the race of the process dying before we return the dentry */
1975 if (tid_fd_revalidate(dentry, NULL)) 2064 if (tid_fd_revalidate(dentry, NULL))
@@ -2101,11 +2190,13 @@ static const struct file_operations proc_fd_operations = {
2101 * /proc/pid/fd needs a special permission handler so that a process can still 2190 * /proc/pid/fd needs a special permission handler so that a process can still
2102 * access /proc/self/fd after it has executed a setuid(). 2191 * access /proc/self/fd after it has executed a setuid().
2103 */ 2192 */
2104static int proc_fd_permission(struct inode *inode, int mask) 2193static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags)
2105{ 2194{
2106 int rv; 2195 int rv;
2107 2196
2108 rv = generic_permission(inode, mask, NULL); 2197 if (flags & IPERM_FLAG_RCU)
2198 return -ECHILD;
2199 rv = generic_permission(inode, mask, flags, NULL);
2109 if (rv == 0) 2200 if (rv == 0)
2110 return 0; 2201 return 0;
2111 if (task_pid(current) == proc_pid(inode)) 2202 if (task_pid(current) == proc_pid(inode))
@@ -2137,7 +2228,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2137 ei->fd = fd; 2228 ei->fd = fd;
2138 inode->i_mode = S_IFREG | S_IRUSR; 2229 inode->i_mode = S_IFREG | S_IRUSR;
2139 inode->i_fop = &proc_fdinfo_file_operations; 2230 inode->i_fop = &proc_fdinfo_file_operations;
2140 dentry->d_op = &tid_fd_dentry_operations; 2231 d_set_d_op(dentry, &tid_fd_dentry_operations);
2141 d_add(dentry, inode); 2232 d_add(dentry, inode);
2142 /* Close the race of the process dying before we return the dentry */ 2233 /* Close the race of the process dying before we return the dentry */
2143 if (tid_fd_revalidate(dentry, NULL)) 2234 if (tid_fd_revalidate(dentry, NULL))
@@ -2196,7 +2287,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2196 if (p->fop) 2287 if (p->fop)
2197 inode->i_fop = p->fop; 2288 inode->i_fop = p->fop;
2198 ei->op = p->op; 2289 ei->op = p->op;
2199 dentry->d_op = &pid_dentry_operations; 2290 d_set_d_op(dentry, &pid_dentry_operations);
2200 d_add(dentry, inode); 2291 d_add(dentry, inode);
2201 /* Close the race of the process dying before we return the dentry */ 2292 /* Close the race of the process dying before we return the dentry */
2202 if (pid_revalidate(dentry, NULL)) 2293 if (pid_revalidate(dentry, NULL))
@@ -2563,8 +2654,14 @@ static const struct pid_entry proc_base_stuff[] = {
2563 */ 2654 */
2564static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd) 2655static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2565{ 2656{
2566 struct inode *inode = dentry->d_inode; 2657 struct inode *inode;
2567 struct task_struct *task = get_proc_task(inode); 2658 struct task_struct *task;
2659
2660 if (nd->flags & LOOKUP_RCU)
2661 return -ECHILD;
2662
2663 inode = dentry->d_inode;
2664 task = get_proc_task(inode);
2568 if (task) { 2665 if (task) {
2569 put_task_struct(task); 2666 put_task_struct(task);
2570 return 1; 2667 return 1;
@@ -2615,7 +2712,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2615 if (p->fop) 2712 if (p->fop)
2616 inode->i_fop = p->fop; 2713 inode->i_fop = p->fop;
2617 ei->op = p->op; 2714 ei->op = p->op;
2618 dentry->d_op = &proc_base_dentry_operations; 2715 d_set_d_op(dentry, &proc_base_dentry_operations);
2619 d_add(dentry, inode); 2716 d_add(dentry, inode);
2620 error = NULL; 2717 error = NULL;
2621out: 2718out:
@@ -2733,6 +2830,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2733#ifdef CONFIG_SCHED_DEBUG 2830#ifdef CONFIG_SCHED_DEBUG
2734 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2831 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
2735#endif 2832#endif
2833#ifdef CONFIG_SCHED_AUTOGROUP
2834 REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations),
2835#endif
2736 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2836 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2737#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2837#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2738 INF("syscall", S_IRUSR, proc_pid_syscall), 2838 INF("syscall", S_IRUSR, proc_pid_syscall),
@@ -2926,7 +3026,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
2926 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff, 3026 inode->i_nlink = 2 + pid_entry_count_dirs(tgid_base_stuff,
2927 ARRAY_SIZE(tgid_base_stuff)); 3027 ARRAY_SIZE(tgid_base_stuff));
2928 3028
2929 dentry->d_op = &pid_dentry_operations; 3029 d_set_d_op(dentry, &pid_dentry_operations);
2930 3030
2931 d_add(dentry, inode); 3031 d_add(dentry, inode);
2932 /* Close the race of the process dying before we return the dentry */ 3032 /* Close the race of the process dying before we return the dentry */
@@ -3169,7 +3269,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3169 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff, 3269 inode->i_nlink = 2 + pid_entry_count_dirs(tid_base_stuff,
3170 ARRAY_SIZE(tid_base_stuff)); 3270 ARRAY_SIZE(tid_base_stuff));
3171 3271
3172 dentry->d_op = &pid_dentry_operations; 3272 d_set_d_op(dentry, &pid_dentry_operations);
3173 3273
3174 d_add(dentry, inode); 3274 d_add(dentry, inode);
3175 /* Close the race of the process dying before we return the dentry */ 3275 /* Close the race of the process dying before we return the dentry */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index dd29f0337661..f766be29d2c7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -400,7 +400,7 @@ static const struct inode_operations proc_link_inode_operations = {
400 * smarter: we could keep a "volatile" flag in the 400 * smarter: we could keep a "volatile" flag in the
401 * inode to indicate which ones to keep. 401 * inode to indicate which ones to keep.
402 */ 402 */
403static int proc_delete_dentry(struct dentry * dentry) 403static int proc_delete_dentry(const struct dentry * dentry)
404{ 404{
405 return 1; 405 return 1;
406} 406}
@@ -439,7 +439,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
439out_unlock: 439out_unlock:
440 440
441 if (inode) { 441 if (inode) {
442 dentry->d_op = &proc_dentry_operations; 442 d_set_d_op(dentry, &proc_dentry_operations);
443 d_add(dentry, inode); 443 d_add(dentry, inode);
444 return NULL; 444 return NULL;
445 } 445 }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9c2b5f484879..6bcb926b101b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -16,7 +16,6 @@
16#include <linux/limits.h> 16#include <linux/limits.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp_lock.h>
20#include <linux/sysctl.h> 19#include <linux/sysctl.h>
21#include <linux/slab.h> 20#include <linux/slab.h>
22 21
@@ -66,11 +65,18 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
66 return inode; 65 return inode;
67} 66}
68 67
69static void proc_destroy_inode(struct inode *inode) 68static void proc_i_callback(struct rcu_head *head)
70{ 69{
70 struct inode *inode = container_of(head, struct inode, i_rcu);
71 INIT_LIST_HEAD(&inode->i_dentry);
71 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 72 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
72} 73}
73 74
75static void proc_destroy_inode(struct inode *inode)
76{
77 call_rcu(&inode->i_rcu, proc_i_callback);
78}
79
74static void init_once(void *foo) 80static void init_once(void *foo)
75{ 81{
76 struct proc_inode *ei = (struct proc_inode *) foo; 82 struct proc_inode *ei = (struct proc_inode *) foo;
diff --git a/fs/proc/proc_console.c b/fs/proc/proc_console.c
new file mode 100644
index 000000000000..8a707609f528
--- /dev/null
+++ b/fs/proc/proc_console.c
@@ -0,0 +1,114 @@
1/*
2 * Copyright (c) 2010 Werner Fink, Jiri Slaby
3 *
4 * Licensed under GPLv2
5 */
6
7#include <linux/console.h>
8#include <linux/kernel.h>
9#include <linux/proc_fs.h>
10#include <linux/seq_file.h>
11#include <linux/tty_driver.h>
12
13/*
14 * This is handler for /proc/consoles
15 */
16static int show_console_dev(struct seq_file *m, void *v)
17{
18 static const struct {
19 short flag;
20 char name;
21 } con_flags[] = {
22 { CON_ENABLED, 'E' },
23 { CON_CONSDEV, 'C' },
24 { CON_BOOT, 'B' },
25 { CON_PRINTBUFFER, 'p' },
26 { CON_BRL, 'b' },
27 { CON_ANYTIME, 'a' },
28 };
29 char flags[ARRAY_SIZE(con_flags) + 1];
30 struct console *con = v;
31 unsigned int a;
32 int len;
33 dev_t dev = 0;
34
35 if (con->device) {
36 const struct tty_driver *driver;
37 int index;
38 driver = con->device(con, &index);
39 if (driver) {
40 dev = MKDEV(driver->major, driver->minor_start);
41 dev += index;
42 }
43 }
44
45 for (a = 0; a < ARRAY_SIZE(con_flags); a++)
46 flags[a] = (con->flags & con_flags[a].flag) ?
47 con_flags[a].name : ' ';
48 flags[a] = 0;
49
50 seq_printf(m, "%s%d%n", con->name, con->index, &len);
51 len = 21 - len;
52 if (len < 1)
53 len = 1;
54 seq_printf(m, "%*c%c%c%c (%s)", len, ' ', con->read ? 'R' : '-',
55 con->write ? 'W' : '-', con->unblank ? 'U' : '-',
56 flags);
57 if (dev)
58 seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev));
59
60 seq_printf(m, "\n");
61
62 return 0;
63}
64
65static void *c_start(struct seq_file *m, loff_t *pos)
66{
67 struct console *con;
68 loff_t off = 0;
69
70 acquire_console_sem();
71 for_each_console(con)
72 if (off++ == *pos)
73 break;
74
75 return con;
76}
77
78static void *c_next(struct seq_file *m, void *v, loff_t *pos)
79{
80 struct console *con = v;
81 ++*pos;
82 return con->next;
83}
84
85static void c_stop(struct seq_file *m, void *v)
86{
87 release_console_sem();
88}
89
90static const struct seq_operations consoles_op = {
91 .start = c_start,
92 .next = c_next,
93 .stop = c_stop,
94 .show = show_console_dev
95};
96
97static int consoles_open(struct inode *inode, struct file *file)
98{
99 return seq_open(file, &consoles_op);
100}
101
102static const struct file_operations proc_consoles_operations = {
103 .open = consoles_open,
104 .read = seq_read,
105 .llseek = seq_lseek,
106 .release = seq_release,
107};
108
109static int register_proc_consoles(void)
110{
111 proc_create("consoles", 0, NULL, &proc_consoles_operations);
112 return 0;
113}
114module_init(register_proc_consoles);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b652cb00906b..09a1f92a34ef 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -5,6 +5,7 @@
5#include <linux/sysctl.h> 5#include <linux/sysctl.h>
6#include <linux/proc_fs.h> 6#include <linux/proc_fs.h>
7#include <linux/security.h> 7#include <linux/security.h>
8#include <linux/namei.h>
8#include "internal.h" 9#include "internal.h"
9 10
10static const struct dentry_operations proc_sys_dentry_operations; 11static const struct dentry_operations proc_sys_dentry_operations;
@@ -120,7 +121,7 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
120 goto out; 121 goto out;
121 122
122 err = NULL; 123 err = NULL;
123 dentry->d_op = &proc_sys_dentry_operations; 124 d_set_d_op(dentry, &proc_sys_dentry_operations);
124 d_add(dentry, inode); 125 d_add(dentry, inode);
125 126
126out: 127out:
@@ -201,7 +202,7 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
201 dput(child); 202 dput(child);
202 return -ENOMEM; 203 return -ENOMEM;
203 } else { 204 } else {
204 child->d_op = &proc_sys_dentry_operations; 205 d_set_d_op(child, &proc_sys_dentry_operations);
205 d_add(child, inode); 206 d_add(child, inode);
206 } 207 }
207 } else { 208 } else {
@@ -294,7 +295,7 @@ out:
294 return ret; 295 return ret;
295} 296}
296 297
297static int proc_sys_permission(struct inode *inode, int mask) 298static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
298{ 299{
299 /* 300 /*
300 * sysctl entries that are not writeable, 301 * sysctl entries that are not writeable,
@@ -304,6 +305,9 @@ static int proc_sys_permission(struct inode *inode, int mask)
304 struct ctl_table *table; 305 struct ctl_table *table;
305 int error; 306 int error;
306 307
308 if (flags & IPERM_FLAG_RCU)
309 return -ECHILD;
310
307 /* Executable files are not allowed under /proc/sys/ */ 311 /* Executable files are not allowed under /proc/sys/ */
308 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) 312 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
309 return -EACCES; 313 return -EACCES;
@@ -389,23 +393,30 @@ static const struct inode_operations proc_sys_dir_operations = {
389 393
390static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 394static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
391{ 395{
396 if (nd->flags & LOOKUP_RCU)
397 return -ECHILD;
392 return !PROC_I(dentry->d_inode)->sysctl->unregistering; 398 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
393} 399}
394 400
395static int proc_sys_delete(struct dentry *dentry) 401static int proc_sys_delete(const struct dentry *dentry)
396{ 402{
397 return !!PROC_I(dentry->d_inode)->sysctl->unregistering; 403 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
398} 404}
399 405
400static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, 406static int proc_sys_compare(const struct dentry *parent,
401 struct qstr *name) 407 const struct inode *pinode,
408 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name)
402{ 410{
403 struct dentry *dentry = container_of(qstr, struct dentry, d_name); 411 /* Although proc doesn't have negative dentries, rcu-walk means
404 if (qstr->len != name->len) 412 * that inode here can be NULL */
413 if (!inode)
414 return 0;
415 if (name->len != len)
405 return 1; 416 return 1;
406 if (memcmp(qstr->name, name->name, name->len)) 417 if (memcmp(name->name, str, len))
407 return 1; 418 return 1;
408 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); 419 return !sysctl_is_seen(PROC_I(inode)->sysctl);
409} 420}
410 421
411static const struct dentry_operations proc_sys_dentry_operations = { 422static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index da6b01d70f01..c126c83b9a45 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -706,6 +706,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
706 * skip over unmapped regions. 706 * skip over unmapped regions.
707 */ 707 */
708#define PAGEMAP_WALK_SIZE (PMD_SIZE) 708#define PAGEMAP_WALK_SIZE (PMD_SIZE)
709#define PAGEMAP_WALK_MASK (PMD_MASK)
709static ssize_t pagemap_read(struct file *file, char __user *buf, 710static ssize_t pagemap_read(struct file *file, char __user *buf,
710 size_t count, loff_t *ppos) 711 size_t count, loff_t *ppos)
711{ 712{
@@ -776,7 +777,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
776 unsigned long end; 777 unsigned long end;
777 778
778 pm.pos = 0; 779 pm.pos = 0;
779 end = start_vaddr + PAGEMAP_WALK_SIZE; 780 end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
780 /* overflow ? */ 781 /* overflow ? */
781 if (end < start_vaddr || end > end_vaddr) 782 if (end < start_vaddr || end > end_vaddr)
782 end = end_vaddr; 783 end = end_vaddr;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 2367fb3f70bc..74802bc5ded9 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -499,7 +499,7 @@ static int __init parse_crash_elf64_headers(void)
499 /* Do some basic Verification. */ 499 /* Do some basic Verification. */
500 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 || 500 if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
501 (ehdr.e_type != ET_CORE) || 501 (ehdr.e_type != ET_CORE) ||
502 !vmcore_elf_check_arch(&ehdr) || 502 !vmcore_elf64_check_arch(&ehdr) ||
503 ehdr.e_ident[EI_CLASS] != ELFCLASS64 || 503 ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
504 ehdr.e_ident[EI_VERSION] != EV_CURRENT || 504 ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
505 ehdr.e_version != EV_CURRENT || 505 ehdr.e_version != EV_CURRENT ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fcada42f1aa3..e63b4171d583 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -425,11 +425,18 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
425 return &ei->vfs_inode; 425 return &ei->vfs_inode;
426} 426}
427 427
428static void qnx4_destroy_inode(struct inode *inode) 428static void qnx4_i_callback(struct rcu_head *head)
429{ 429{
430 struct inode *inode = container_of(head, struct inode, i_rcu);
431 INIT_LIST_HEAD(&inode->i_dentry);
430 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); 432 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
431} 433}
432 434
435static void qnx4_destroy_inode(struct inode *inode)
436{
437 call_rcu(&inode->i_rcu, qnx4_i_callback);
438}
439
433static void init_once(void *foo) 440static void init_once(void *foo)
434{ 441{
435 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; 442 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
diff --git a/fs/read_write.c b/fs/read_write.c
index 431a0ed610c8..5d431bacbea9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
9#include <linux/fcntl.h> 9#include <linux/fcntl.h>
10#include <linux/file.h> 10#include <linux/file.h>
11#include <linux/uio.h> 11#include <linux/uio.h>
12#include <linux/smp_lock.h>
13#include <linux/fsnotify.h> 12#include <linux/fsnotify.h>
14#include <linux/security.h> 13#include <linux/security.h>
15#include <linux/module.h> 14#include <linux/module.h>
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 41656d40dc5c..0bae036831e2 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -8,7 +8,6 @@
8#include <linux/reiserfs_acl.h> 8#include <linux/reiserfs_acl.h>
9#include <linux/reiserfs_xattr.h> 9#include <linux/reiserfs_xattr.h>
10#include <linux/exportfs.h> 10#include <linux/exportfs.h>
11#include <linux/smp_lock.h>
12#include <linux/pagemap.h> 11#include <linux/pagemap.h>
13#include <linux/highmem.h> 12#include <linux/highmem.h>
14#include <linux/slab.h> 13#include <linux/slab.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index adf22b485cea..79265fdc317a 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,7 +9,6 @@
9#include <linux/time.h> 9#include <linux/time.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
11#include <linux/pagemap.h> 11#include <linux/pagemap.h>
12#include <linux/smp_lock.h>
13#include <linux/compat.h> 12#include <linux/compat.h>
14 13
15/* 14/*
@@ -184,12 +183,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
184 return 0; 183 return 0;
185 } 184 }
186 185
187 /* we need to make sure nobody is changing the file size beneath
188 ** us
189 */
190 reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
191 depth = reiserfs_write_lock_once(inode->i_sb); 186 depth = reiserfs_write_lock_once(inode->i_sb);
192 187
188 /* we need to make sure nobody is changing the file size beneath us */
189 reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
190
193 write_from = inode->i_size & (blocksize - 1); 191 write_from = inode->i_size & (blocksize - 1);
194 /* if we are on a block boundary, we are already unpacked. */ 192 /* if we are on a block boundary, we are already unpacked. */
195 if (write_from == 0) { 193 if (write_from == 0) {
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 076c8b194682..d31bce1a9f90 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -43,7 +43,6 @@
43#include <linux/fcntl.h> 43#include <linux/fcntl.h>
44#include <linux/stat.h> 44#include <linux/stat.h>
45#include <linux/string.h> 45#include <linux/string.h>
46#include <linux/smp_lock.h>
47#include <linux/buffer_head.h> 46#include <linux/buffer_head.h>
48#include <linux/workqueue.h> 47#include <linux/workqueue.h>
49#include <linux/writeback.h> 48#include <linux/writeback.h>
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3bf7a6457f4d..2575682a9ead 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,7 +28,6 @@
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/crc32.h> 30#include <linux/crc32.h>
31#include <linux/smp_lock.h>
32 31
33struct file_system_type reiserfs_fs_type; 32struct file_system_type reiserfs_fs_type;
34 33
@@ -530,11 +529,18 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
530 return &ei->vfs_inode; 529 return &ei->vfs_inode;
531} 530}
532 531
533static void reiserfs_destroy_inode(struct inode *inode) 532static void reiserfs_i_callback(struct rcu_head *head)
534{ 533{
534 struct inode *inode = container_of(head, struct inode, i_rcu);
535 INIT_LIST_HEAD(&inode->i_dentry);
535 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 536 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
536} 537}
537 538
539static void reiserfs_destroy_inode(struct inode *inode)
540{
541 call_rcu(&inode->i_rcu, reiserfs_i_callback);
542}
543
538static void init_once(void *foo) 544static void init_once(void *foo)
539{ 545{
540 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; 546 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 5d04a7828e7a..3cfb2e933644 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -870,11 +870,14 @@ out:
870 return err; 870 return err;
871} 871}
872 872
873static int reiserfs_check_acl(struct inode *inode, int mask) 873static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags)
874{ 874{
875 struct posix_acl *acl; 875 struct posix_acl *acl;
876 int error = -EAGAIN; /* do regular unix permission checks by default */ 876 int error = -EAGAIN; /* do regular unix permission checks by default */
877 877
878 if (flags & IPERM_FLAG_RCU)
879 return -ECHILD;
880
878 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 881 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
879 882
880 if (acl) { 883 if (acl) {
@@ -951,8 +954,10 @@ static int xattr_mount_check(struct super_block *s)
951 return 0; 954 return 0;
952} 955}
953 956
954int reiserfs_permission(struct inode *inode, int mask) 957int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
955{ 958{
959 if (flags & IPERM_FLAG_RCU)
960 return -ECHILD;
956 /* 961 /*
957 * We don't do permission checks on the internal objects. 962 * We don't do permission checks on the internal objects.
958 * Permissions are determined by the "owning" object. 963 * Permissions are determined by the "owning" object.
@@ -965,13 +970,16 @@ int reiserfs_permission(struct inode *inode, int mask)
965 * Stat data v1 doesn't support ACLs. 970 * Stat data v1 doesn't support ACLs.
966 */ 971 */
967 if (get_inode_sd_version(inode) != STAT_DATA_V1) 972 if (get_inode_sd_version(inode) != STAT_DATA_V1)
968 return generic_permission(inode, mask, reiserfs_check_acl); 973 return generic_permission(inode, mask, flags,
974 reiserfs_check_acl);
969#endif 975#endif
970 return generic_permission(inode, mask, NULL); 976 return generic_permission(inode, mask, flags, NULL);
971} 977}
972 978
973static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
974{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
975 return -EPERM; 983 return -EPERM;
976} 984}
977 985
@@ -990,7 +998,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
990 strlen(PRIVROOT_NAME)); 998 strlen(PRIVROOT_NAME));
991 if (!IS_ERR(dentry)) { 999 if (!IS_ERR(dentry)) {
992 REISERFS_SB(s)->priv_root = dentry; 1000 REISERFS_SB(s)->priv_root = dentry;
993 dentry->d_op = &xattr_lookup_poison_ops; 1001 d_set_d_op(dentry, &xattr_lookup_poison_ops);
994 if (dentry->d_inode) 1002 if (dentry->d_inode)
995 dentry->d_inode->i_flags |= S_PRIVATE; 1003 dentry->d_inode->i_flags |= S_PRIVATE;
996 } else 1004 } else
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 536d697a8a28..90d2fcb67a31 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -472,7 +472,9 @@ int reiserfs_acl_chmod(struct inode *inode)
472 struct reiserfs_transaction_handle th; 472 struct reiserfs_transaction_handle th;
473 size_t size = reiserfs_xattr_nblocks(inode, 473 size_t size = reiserfs_xattr_nblocks(inode,
474 reiserfs_acl_size(clone->a_count)); 474 reiserfs_acl_size(clone->a_count));
475 reiserfs_write_lock(inode->i_sb); 475 int depth;
476
477 depth = reiserfs_write_lock_once(inode->i_sb);
476 error = journal_begin(&th, inode->i_sb, size * 2); 478 error = journal_begin(&th, inode->i_sb, size * 2);
477 if (!error) { 479 if (!error) {
478 int error2; 480 int error2;
@@ -482,7 +484,7 @@ int reiserfs_acl_chmod(struct inode *inode)
482 if (error2) 484 if (error2)
483 error = error2; 485 error = error2;
484 } 486 }
485 reiserfs_write_unlock(inode->i_sb); 487 reiserfs_write_unlock_once(inode->i_sb, depth);
486 } 488 }
487 posix_acl_release(clone); 489 posix_acl_release(clone);
488 return error; 490 return error;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 6647f90e55cd..2305e3121cb1 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -400,11 +400,18 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
400/* 400/*
401 * return a spent inode to the slab cache 401 * return a spent inode to the slab cache
402 */ 402 */
403static void romfs_destroy_inode(struct inode *inode) 403static void romfs_i_callback(struct rcu_head *head)
404{ 404{
405 struct inode *inode = container_of(head, struct inode, i_rcu);
406 INIT_LIST_HEAD(&inode->i_dentry);
405 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 407 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
406} 408}
407 409
410static void romfs_destroy_inode(struct inode *inode)
411{
412 call_rcu(&inode->i_rcu, romfs_i_callback);
413}
414
408/* 415/*
409 * get filesystem statistics 416 * get filesystem statistics
410 */ 417 */
diff --git a/fs/splice.c b/fs/splice.c
index 8f1dfaecc8f0..ce2f02579e35 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1311,18 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
1311static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1311static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
1312 struct pipe_inode_info *opipe, 1312 struct pipe_inode_info *opipe,
1313 size_t len, unsigned int flags); 1313 size_t len, unsigned int flags);
1314/*
1315 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
1316 * location, so checking ->i_pipe is not enough to verify that this is a
1317 * pipe.
1318 */
1319static inline struct pipe_inode_info *pipe_info(struct inode *inode)
1320{
1321 if (S_ISFIFO(inode->i_mode))
1322 return inode->i_pipe;
1323
1324 return NULL;
1325}
1326 1314
1327/* 1315/*
1328 * Determine where to splice to/from. 1316 * Determine where to splice to/from.
@@ -1336,8 +1324,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1336 loff_t offset, *off; 1324 loff_t offset, *off;
1337 long ret; 1325 long ret;
1338 1326
1339 ipipe = pipe_info(in->f_path.dentry->d_inode); 1327 ipipe = get_pipe_info(in);
1340 opipe = pipe_info(out->f_path.dentry->d_inode); 1328 opipe = get_pipe_info(out);
1341 1329
1342 if (ipipe && opipe) { 1330 if (ipipe && opipe) {
1343 if (off_in || off_out) 1331 if (off_in || off_out)
@@ -1555,7 +1543,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
1555 int error; 1543 int error;
1556 long ret; 1544 long ret;
1557 1545
1558 pipe = pipe_info(file->f_path.dentry->d_inode); 1546 pipe = get_pipe_info(file);
1559 if (!pipe) 1547 if (!pipe)
1560 return -EBADF; 1548 return -EBADF;
1561 1549
@@ -1642,7 +1630,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
1642 }; 1630 };
1643 long ret; 1631 long ret;
1644 1632
1645 pipe = pipe_info(file->f_path.dentry->d_inode); 1633 pipe = get_pipe_info(file);
1646 if (!pipe) 1634 if (!pipe)
1647 return -EBADF; 1635 return -EBADF;
1648 1636
@@ -2022,8 +2010,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
2022static long do_tee(struct file *in, struct file *out, size_t len, 2010static long do_tee(struct file *in, struct file *out, size_t len,
2023 unsigned int flags) 2011 unsigned int flags)
2024{ 2012{
2025 struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode); 2013 struct pipe_inode_info *ipipe = get_pipe_info(in);
2026 struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode); 2014 struct pipe_inode_info *opipe = get_pipe_info(out);
2027 int ret = -EINVAL; 2015 int ret = -EINVAL;
2028 2016
2029 /* 2017 /*
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 24de30ba34c1..20700b9f2b4c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -440,11 +440,18 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
440} 440}
441 441
442 442
443static void squashfs_destroy_inode(struct inode *inode) 443static void squashfs_i_callback(struct rcu_head *head)
444{ 444{
445 struct inode *inode = container_of(head, struct inode, i_rcu);
446 INIT_LIST_HEAD(&inode->i_dentry);
445 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); 447 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
446} 448}
447 449
450static void squashfs_destroy_inode(struct inode *inode)
451{
452 call_rcu(&inode->i_rcu, squashfs_i_callback);
453}
454
448 455
449static struct file_system_type squashfs_fs_type = { 456static struct file_system_type squashfs_fs_type = {
450 .owner = THIS_MODULE, 457 .owner = THIS_MODULE,
diff --git a/fs/super.c b/fs/super.c
index ca696155cd9a..823e061faa87 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -30,6 +30,7 @@
30#include <linux/idr.h> 30#include <linux/idr.h>
31#include <linux/mutex.h> 31#include <linux/mutex.h>
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/rculist_bl.h>
33#include "internal.h" 34#include "internal.h"
34 35
35 36
@@ -71,7 +72,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
71 INIT_LIST_HEAD(&s->s_files); 72 INIT_LIST_HEAD(&s->s_files);
72#endif 73#endif
73 INIT_LIST_HEAD(&s->s_instances); 74 INIT_LIST_HEAD(&s->s_instances);
74 INIT_HLIST_HEAD(&s->s_anon); 75 INIT_HLIST_BL_HEAD(&s->s_anon);
75 INIT_LIST_HEAD(&s->s_inodes); 76 INIT_LIST_HEAD(&s->s_inodes);
76 INIT_LIST_HEAD(&s->s_dentry_lru); 77 INIT_LIST_HEAD(&s->s_dentry_lru);
77 init_rwsem(&s->s_umount); 78 init_rwsem(&s->s_umount);
@@ -1139,7 +1140,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1139 return mnt; 1140 return mnt;
1140 1141
1141 err: 1142 err:
1142 mntput(mnt); 1143 mntput_long(mnt);
1143 return ERR_PTR(err); 1144 return ERR_PTR(err);
1144} 1145}
1145 1146
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 7e54bac8c4b0..ea9120a830d8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -231,7 +231,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
231 goto repeat; 231 goto repeat;
232} 232}
233 233
234static int sysfs_dentry_delete(struct dentry *dentry) 234static int sysfs_dentry_delete(const struct dentry *dentry)
235{ 235{
236 struct sysfs_dirent *sd = dentry->d_fsdata; 236 struct sysfs_dirent *sd = dentry->d_fsdata;
237 return !!(sd->s_flags & SYSFS_FLAG_REMOVED); 237 return !!(sd->s_flags & SYSFS_FLAG_REMOVED);
@@ -239,9 +239,13 @@ static int sysfs_dentry_delete(struct dentry *dentry)
239 239
240static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) 240static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
241{ 241{
242 struct sysfs_dirent *sd = dentry->d_fsdata; 242 struct sysfs_dirent *sd;
243 int is_dir; 243 int is_dir;
244 244
245 if (nd->flags & LOOKUP_RCU)
246 return -ECHILD;
247
248 sd = dentry->d_fsdata;
245 mutex_lock(&sysfs_mutex); 249 mutex_lock(&sysfs_mutex);
246 250
247 /* The sysfs dirent has been deleted */ 251 /* The sysfs dirent has been deleted */
@@ -701,7 +705,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
701 /* instantiate and hash dentry */ 705 /* instantiate and hash dentry */
702 ret = d_find_alias(inode); 706 ret = d_find_alias(inode);
703 if (!ret) { 707 if (!ret) {
704 dentry->d_op = &sysfs_dentry_ops; 708 d_set_d_op(dentry, &sysfs_dentry_ops);
705 dentry->d_fsdata = sysfs_get(sd); 709 dentry->d_fsdata = sysfs_get(sd);
706 d_add(dentry, inode); 710 d_add(dentry, inode);
707 } else { 711 } else {
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index cffb1fd8ba33..30ac27345586 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -348,13 +348,18 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
348 return -ENOENT; 348 return -ENOENT;
349} 349}
350 350
351int sysfs_permission(struct inode *inode, int mask) 351int sysfs_permission(struct inode *inode, int mask, unsigned int flags)
352{ 352{
353 struct sysfs_dirent *sd = inode->i_private; 353 struct sysfs_dirent *sd;
354
355 if (flags & IPERM_FLAG_RCU)
356 return -ECHILD;
357
358 sd = inode->i_private;
354 359
355 mutex_lock(&sysfs_mutex); 360 mutex_lock(&sysfs_mutex);
356 sysfs_refresh_inode(sd, inode); 361 sysfs_refresh_inode(sd, inode);
357 mutex_unlock(&sysfs_mutex); 362 mutex_unlock(&sysfs_mutex);
358 363
359 return generic_permission(inode, mask, NULL); 364 return generic_permission(inode, mask, flags, NULL);
360} 365}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index d9be60a2e956..ffaaa816bfba 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -200,7 +200,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); 200struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
201void sysfs_evict_inode(struct inode *inode); 201void sysfs_evict_inode(struct inode *inode);
202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); 202int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
203int sysfs_permission(struct inode *inode, int mask); 203int sysfs_permission(struct inode *inode, int mask, unsigned int flags);
204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 204int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
205int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 205int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
206int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 206int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index de44d067b9e6..0630eb969a28 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -333,11 +333,18 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
333 return &si->vfs_inode; 333 return &si->vfs_inode;
334} 334}
335 335
336static void sysv_destroy_inode(struct inode *inode) 336static void sysv_i_callback(struct rcu_head *head)
337{ 337{
338 struct inode *inode = container_of(head, struct inode, i_rcu);
339 INIT_LIST_HEAD(&inode->i_dentry);
338 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); 340 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
339} 341}
340 342
343static void sysv_destroy_inode(struct inode *inode)
344{
345 call_rcu(&inode->i_rcu, sysv_i_callback);
346}
347
341static void init_once(void *p) 348static void init_once(void *p)
342{ 349{
343 struct sysv_inode_info *si = (struct sysv_inode_info *)p; 350 struct sysv_inode_info *si = (struct sysv_inode_info *)p;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e7f7d11cd0..b5e68da2db32 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -27,7 +27,8 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
27 return err; 27 return err;
28} 28}
29 29
30static int sysv_hash(struct dentry *dentry, struct qstr *qstr) 30static int sysv_hash(const struct dentry *dentry, const struct inode *inode,
31 struct qstr *qstr)
31{ 32{
32 /* Truncate the name in place, avoids having to define a compare 33 /* Truncate the name in place, avoids having to define a compare
33 function. */ 34 function. */
@@ -47,7 +48,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
47 struct inode * inode = NULL; 48 struct inode * inode = NULL;
48 ino_t ino; 49 ino_t ino;
49 50
50 dentry->d_op = dir->i_sb->s_root->d_op; 51 d_set_d_op(dentry, dir->i_sb->s_root->d_op);
51 if (dentry->d_name.len > SYSV_NAMELEN) 52 if (dentry->d_name.len > SYSV_NAMELEN)
52 return ERR_PTR(-ENAMETOOLONG); 53 return ERR_PTR(-ENAMETOOLONG);
53 ino = sysv_inode_by_name(dentry); 54 ino = sysv_inode_by_name(dentry);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index 3d9c62be0c10..76712aefc4ab 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -346,7 +346,7 @@ static int complete_read_super(struct super_block *sb, int silent, int size)
346 if (sbi->s_forced_ro) 346 if (sbi->s_forced_ro)
347 sb->s_flags |= MS_RDONLY; 347 sb->s_flags |= MS_RDONLY;
348 if (sbi->s_truncate) 348 if (sbi->s_truncate)
349 sb->s_root->d_op = &sysv_dentry_operations; 349 d_set_d_op(sb->s_root, &sysv_dentry_operations);
350 return 1; 350 return 1;
351} 351}
352 352
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 91fac54c70e3..6e11c2975dcf 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
272 return &ui->vfs_inode; 272 return &ui->vfs_inode;
273}; 273};
274 274
275static void ubifs_i_callback(struct rcu_head *head)
276{
277 struct inode *inode = container_of(head, struct inode, i_rcu);
278 struct ubifs_inode *ui = ubifs_inode(inode);
279 INIT_LIST_HEAD(&inode->i_dentry);
280 kmem_cache_free(ubifs_inode_slab, ui);
281}
282
275static void ubifs_destroy_inode(struct inode *inode) 283static void ubifs_destroy_inode(struct inode *inode)
276{ 284{
277 struct ubifs_inode *ui = ubifs_inode(inode); 285 struct ubifs_inode *ui = ubifs_inode(inode);
278 286
279 kfree(ui->data); 287 kfree(ui->data);
280 kmem_cache_free(ubifs_inode_slab, inode); 288 call_rcu(&inode->i_rcu, ubifs_i_callback);
281} 289}
282 290
283/* 291/*
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4a5c7c61836a..b539d53320fb 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -139,11 +139,18 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
139 return &ei->vfs_inode; 139 return &ei->vfs_inode;
140} 140}
141 141
142static void udf_destroy_inode(struct inode *inode) 142static void udf_i_callback(struct rcu_head *head)
143{ 143{
144 struct inode *inode = container_of(head, struct inode, i_rcu);
145 INIT_LIST_HEAD(&inode->i_dentry);
144 kmem_cache_free(udf_inode_cachep, UDF_I(inode)); 146 kmem_cache_free(udf_inode_cachep, UDF_I(inode));
145} 147}
146 148
149static void udf_destroy_inode(struct inode *inode)
150{
151 call_rcu(&inode->i_rcu, udf_i_callback);
152}
153
147static void init_once(void *foo) 154static void init_once(void *foo)
148{ 155{
149 struct udf_inode_info *ei = (struct udf_inode_info *)foo; 156 struct udf_inode_info *ei = (struct udf_inode_info *)foo;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2c47daed56da..2c61ac5d4e48 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1412,11 +1412,18 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
1412 return &ei->vfs_inode; 1412 return &ei->vfs_inode;
1413} 1413}
1414 1414
1415static void ufs_destroy_inode(struct inode *inode) 1415static void ufs_i_callback(struct rcu_head *head)
1416{ 1416{
1417 struct inode *inode = container_of(head, struct inode, i_rcu);
1418 INIT_LIST_HEAD(&inode->i_dentry);
1417 kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); 1419 kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
1418} 1420}
1419 1421
1422static void ufs_destroy_inode(struct inode *inode)
1423{
1424 call_rcu(&inode->i_rcu, ufs_i_callback);
1425}
1426
1420static void init_once(void *foo) 1427static void init_once(void *foo)
1421{ 1428{
1422 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; 1429 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index b2771862fd3d..39f4f809bb68 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,12 +219,13 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219} 219}
220 220
221int 221int
222xfs_check_acl(struct inode *inode, int mask) 222xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
223{ 223{
224 struct xfs_inode *ip = XFS_I(inode); 224 struct xfs_inode *ip;
225 struct posix_acl *acl; 225 struct posix_acl *acl;
226 int error = -EAGAIN; 226 int error = -EAGAIN;
227 227
228 ip = XFS_I(inode);
228 trace_xfs_check_acl(ip); 229 trace_xfs_check_acl(ip);
229 230
230 /* 231 /*
@@ -234,6 +235,12 @@ xfs_check_acl(struct inode *inode, int mask)
234 if (!XFS_IFORK_Q(ip)) 235 if (!XFS_IFORK_Q(ip))
235 return -EAGAIN; 236 return -EAGAIN;
236 237
238 if (flags & IPERM_FLAG_RCU) {
239 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
240 return -ECHILD;
241 return -EAGAIN;
242 }
243
237 acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); 244 acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
238 if (IS_ERR(acl)) 245 if (IS_ERR(acl))
239 return PTR_ERR(acl); 246 return PTR_ERR(acl);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c9af48fffcd7..691f61223ed6 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -934,7 +934,6 @@ xfs_aops_discard_page(
934 struct xfs_inode *ip = XFS_I(inode); 934 struct xfs_inode *ip = XFS_I(inode);
935 struct buffer_head *bh, *head; 935 struct buffer_head *bh, *head;
936 loff_t offset = page_offset(page); 936 loff_t offset = page_offset(page);
937 ssize_t len = 1 << inode->i_blkbits;
938 937
939 if (!xfs_is_delayed_page(page, IO_DELAY)) 938 if (!xfs_is_delayed_page(page, IO_DELAY))
940 goto out_invalidate; 939 goto out_invalidate;
@@ -949,58 +948,14 @@ xfs_aops_discard_page(
949 xfs_ilock(ip, XFS_ILOCK_EXCL); 948 xfs_ilock(ip, XFS_ILOCK_EXCL);
950 bh = head = page_buffers(page); 949 bh = head = page_buffers(page);
951 do { 950 do {
952 int done;
953 xfs_fileoff_t offset_fsb;
954 xfs_bmbt_irec_t imap;
955 int nimaps = 1;
956 int error; 951 int error;
957 xfs_fsblock_t firstblock; 952 xfs_fileoff_t start_fsb;
958 xfs_bmap_free_t flist;
959 953
960 if (!buffer_delay(bh)) 954 if (!buffer_delay(bh))
961 goto next_buffer; 955 goto next_buffer;
962 956
963 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 957 start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
964 958 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
965 /*
966 * Map the range first and check that it is a delalloc extent
967 * before trying to unmap the range. Otherwise we will be
968 * trying to remove a real extent (which requires a
969 * transaction) or a hole, which is probably a bad idea...
970 */
971 error = xfs_bmapi(NULL, ip, offset_fsb, 1,
972 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
973 &nimaps, NULL);
974
975 if (error) {
976 /* something screwed, just bail */
977 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
978 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
979 "page discard failed delalloc mapping lookup.");
980 }
981 break;
982 }
983 if (!nimaps) {
984 /* nothing there */
985 goto next_buffer;
986 }
987 if (imap.br_startblock != DELAYSTARTBLOCK) {
988 /* been converted, ignore */
989 goto next_buffer;
990 }
991 WARN_ON(imap.br_blockcount == 0);
992
993 /*
994 * Note: while we initialise the firstblock/flist pair, they
995 * should never be used because blocks should never be
996 * allocated or freed for a delalloc extent and hence we need
997 * don't cancel or finish them after the xfs_bunmapi() call.
998 */
999 xfs_bmap_init(&flist, &firstblock);
1000 error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
1001 &flist, &done);
1002
1003 ASSERT(!flist.xbf_count && !flist.xbf_first);
1004 if (error) { 959 if (error) {
1005 /* something screwed, just bail */ 960 /* something screwed, just bail */
1006 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 961 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -1010,7 +965,7 @@ xfs_aops_discard_page(
1010 break; 965 break;
1011 } 966 }
1012next_buffer: 967next_buffer:
1013 offset += len; 968 offset += 1 << inode->i_blkbits;
1014 969
1015 } while ((bh = bh->b_this_page) != head); 970 } while ((bh = bh->b_this_page) != head);
1016 971
@@ -1111,11 +1066,12 @@ xfs_vm_writepage(
1111 uptodate = 0; 1066 uptodate = 0;
1112 1067
1113 /* 1068 /*
1114 * A hole may still be marked uptodate because discard_buffer 1069 * set_page_dirty dirties all buffers in a page, independent
1115 * leaves the flag set. 1070 * of their state. The dirty state however is entirely
1071 * meaningless for holes (!mapped && uptodate), so skip
1072 * buffers covering holes here.
1116 */ 1073 */
1117 if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 1074 if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
1118 ASSERT(!buffer_dirty(bh));
1119 imap_valid = 0; 1075 imap_valid = 0;
1120 continue; 1076 continue;
1121 } 1077 }
@@ -1504,11 +1460,42 @@ xfs_vm_write_failed(
1504 struct inode *inode = mapping->host; 1460 struct inode *inode = mapping->host;
1505 1461
1506 if (to > inode->i_size) { 1462 if (to > inode->i_size) {
1507 struct iattr ia = { 1463 /*
1508 .ia_valid = ATTR_SIZE | ATTR_FORCE, 1464 * punch out the delalloc blocks we have already allocated. We
1509 .ia_size = inode->i_size, 1465 * don't call xfs_setattr() to do this as we may be in the
1510 }; 1466 * middle of a multi-iovec write and so the vfs inode->i_size
1511 xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); 1467 * will not match the xfs ip->i_size and so it will zero too
1468 * much. Hence we jus truncate the page cache to zero what is
1469 * necessary and punch the delalloc blocks directly.
1470 */
1471 struct xfs_inode *ip = XFS_I(inode);
1472 xfs_fileoff_t start_fsb;
1473 xfs_fileoff_t end_fsb;
1474 int error;
1475
1476 truncate_pagecache(inode, to, inode->i_size);
1477
1478 /*
1479 * Check if there are any blocks that are outside of i_size
1480 * that need to be trimmed back.
1481 */
1482 start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
1483 end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
1484 if (end_fsb <= start_fsb)
1485 return;
1486
1487 xfs_ilock(ip, XFS_ILOCK_EXCL);
1488 error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
1489 end_fsb - start_fsb);
1490 if (error) {
1491 /* something screwed, just bail */
1492 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1493 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
1494 "xfs_vm_write_failed: unable to clean up ino %lld",
1495 ip->i_ino);
1496 }
1497 }
1498 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1512 } 1499 }
1513} 1500}
1514 1501
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 63fd2c07cb57..4c5deb6e9e31 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -488,29 +488,16 @@ found:
488 spin_unlock(&pag->pag_buf_lock); 488 spin_unlock(&pag->pag_buf_lock);
489 xfs_perag_put(pag); 489 xfs_perag_put(pag);
490 490
491 /* Attempt to get the semaphore without sleeping, 491 if (xfs_buf_cond_lock(bp)) {
492 * if this does not work then we need to drop the 492 /* failed, so wait for the lock if requested. */
493 * spinlock and do a hard attempt on the semaphore.
494 */
495 if (down_trylock(&bp->b_sema)) {
496 if (!(flags & XBF_TRYLOCK)) { 493 if (!(flags & XBF_TRYLOCK)) {
497 /* wait for buffer ownership */
498 xfs_buf_lock(bp); 494 xfs_buf_lock(bp);
499 XFS_STATS_INC(xb_get_locked_waited); 495 XFS_STATS_INC(xb_get_locked_waited);
500 } else { 496 } else {
501 /* We asked for a trylock and failed, no need
502 * to look at file offset and length here, we
503 * know that this buffer at least overlaps our
504 * buffer and is locked, therefore our buffer
505 * either does not exist, or is this buffer.
506 */
507 xfs_buf_rele(bp); 497 xfs_buf_rele(bp);
508 XFS_STATS_INC(xb_busy_locked); 498 XFS_STATS_INC(xb_busy_locked);
509 return NULL; 499 return NULL;
510 } 500 }
511 } else {
512 /* trylock worked */
513 XB_SET_OWNER(bp);
514 } 501 }
515 502
516 if (bp->b_flags & XBF_STALE) { 503 if (bp->b_flags & XBF_STALE) {
@@ -876,10 +863,18 @@ xfs_buf_rele(
876 */ 863 */
877 864
878/* 865/*
879 * Locks a buffer object, if it is not already locked. 866 * Locks a buffer object, if it is not already locked. Note that this in
880 * Note that this in no way locks the underlying pages, so it is only 867 * no way locks the underlying pages, so it is only useful for
881 * useful for synchronizing concurrent use of buffer objects, not for 868 * synchronizing concurrent use of buffer objects, not for synchronizing
882 * synchronizing independent access to the underlying pages. 869 * independent access to the underlying pages.
870 *
871 * If we come across a stale, pinned, locked buffer, we know that we are
872 * being asked to lock a buffer that has been reallocated. Because it is
873 * pinned, we know that the log has not been pushed to disk and hence it
874 * will still be locked. Rather than continuing to have trylock attempts
875 * fail until someone else pushes the log, push it ourselves before
876 * returning. This means that the xfsaild will not get stuck trying
877 * to push on stale inode buffers.
883 */ 878 */
884int 879int
885xfs_buf_cond_lock( 880xfs_buf_cond_lock(
@@ -890,6 +885,8 @@ xfs_buf_cond_lock(
890 locked = down_trylock(&bp->b_sema) == 0; 885 locked = down_trylock(&bp->b_sema) == 0;
891 if (locked) 886 if (locked)
892 XB_SET_OWNER(bp); 887 XB_SET_OWNER(bp);
888 else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
889 xfs_log_force(bp->b_target->bt_mount, 0);
893 890
894 trace_xfs_buf_cond_lock(bp, _RET_IP_); 891 trace_xfs_buf_cond_lock(bp, _RET_IP_);
895 return locked ? 0 : -EBUSY; 892 return locked ? 0 : -EBUSY;
@@ -1781,7 +1778,6 @@ xfs_buf_delwri_split(
1781 INIT_LIST_HEAD(list); 1778 INIT_LIST_HEAD(list);
1782 spin_lock(dwlk); 1779 spin_lock(dwlk);
1783 list_for_each_entry_safe(bp, n, dwq, b_list) { 1780 list_for_each_entry_safe(bp, n, dwq, b_list) {
1784 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1785 ASSERT(bp->b_flags & XBF_DELWRI); 1781 ASSERT(bp->b_flags & XBF_DELWRI);
1786 1782
1787 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { 1783 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1795,6 +1791,7 @@ xfs_buf_delwri_split(
1795 _XBF_RUN_QUEUES); 1791 _XBF_RUN_QUEUES);
1796 bp->b_flags |= XBF_WRITE; 1792 bp->b_flags |= XBF_WRITE;
1797 list_move_tail(&bp->b_list, list); 1793 list_move_tail(&bp->b_list, list);
1794 trace_xfs_buf_delwri_split(bp, _RET_IP_);
1798 } else 1795 } else
1799 skipped++; 1796 skipped++;
1800 } 1797 }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 2ea238f6d38e..ad442d9e392e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle(
416 if (IS_ERR(dentry)) 416 if (IS_ERR(dentry))
417 return PTR_ERR(dentry); 417 return PTR_ERR(dentry);
418 418
419 kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); 419 kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
420 if (!kbuf) 420 if (!kbuf)
421 goto out_dput; 421 goto out_dput;
422 422
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 96107efc0c61..94d5fd6a2973 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -762,7 +762,8 @@ xfs_setup_inode(
762 inode->i_state = I_NEW; 762 inode->i_state = I_NEW;
763 763
764 inode_sb_list_add(inode); 764 inode_sb_list_add(inode);
765 insert_inode_hash(inode); 765 /* make the inode look hashed for the writeback code */
766 hlist_add_fake(&inode->i_hash);
766 767
767 inode->i_mode = ip->i_d.di_mode; 768 inode->i_mode = ip->i_d.di_mode;
768 inode->i_nlink = ip->i_d.di_nlink; 769 inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9f3a78fe6ae4..064f964d4f3c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -353,9 +353,6 @@ xfs_parseargs(
353 mp->m_qflags &= ~XFS_OQUOTA_ENFD; 353 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { 354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
355 mp->m_flags |= XFS_MOUNT_DELAYLOG; 355 mp->m_flags |= XFS_MOUNT_DELAYLOG;
356 cmn_err(CE_WARN,
357 "Enabling EXPERIMENTAL delayed logging feature "
358 "- use at your own risk.\n");
359 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
360 mp->m_flags &= ~XFS_MOUNT_DELAYLOG; 357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
361 } else if (!strcmp(this_char, "ihashsize")) { 358 } else if (!strcmp(this_char, "ihashsize")) {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 37d33254981d..afb0d7cfad1c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -853,6 +853,7 @@ restart:
853 if (trylock) { 853 if (trylock) {
854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { 854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
855 skipped++; 855 skipped++;
856 xfs_perag_put(pag);
856 continue; 857 continue;
857 } 858 }
858 first_index = pag->pag_ici_reclaim_cursor; 859 first_index = pag->pag_ici_reclaim_cursor;
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 0135e2a669d7..11dd72070cbb 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
43 43
44#ifdef CONFIG_XFS_POSIX_ACL 44#ifdef CONFIG_XFS_POSIX_ACL
45extern int xfs_check_acl(struct inode *inode, int mask); 45extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags);
46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); 47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
48extern int xfs_acl_chmod(struct inode *inode); 48extern int xfs_acl_chmod(struct inode *inode);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8abd12e32e13..4111cd3966c7 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5471,8 +5471,13 @@ xfs_getbmap(
5471 if (error) 5471 if (error)
5472 goto out_unlock_iolock; 5472 goto out_unlock_iolock;
5473 } 5473 }
5474 5474 /*
5475 ASSERT(ip->i_delayed_blks == 0); 5475 * even after flushing the inode, there can still be delalloc
5476 * blocks on the inode beyond EOF due to speculative
5477 * preallocation. These are not removed until the release
5478 * function is called or the inode is inactivated. Hence we
5479 * cannot assert here that ip->i_delayed_blks == 0.
5480 */
5476 } 5481 }
5477 5482
5478 lock = xfs_ilock_map_shared(ip); 5483 lock = xfs_ilock_map_shared(ip);
@@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves(
6070 *count += xfs_bmbt_disk_get_blockcount(frp); 6075 *count += xfs_bmbt_disk_get_blockcount(frp);
6071 } 6076 }
6072} 6077}
6078
6079/*
6080 * dead simple method of punching delalyed allocation blocks from a range in
6081 * the inode. Walks a block at a time so will be slow, but is only executed in
6082 * rare error cases so the overhead is not critical. This will alays punch out
6083 * both the start and end blocks, even if the ranges only partially overlap
6084 * them, so it is up to the caller to ensure that partial blocks are not
6085 * passed in.
6086 */
6087int
6088xfs_bmap_punch_delalloc_range(
6089 struct xfs_inode *ip,
6090 xfs_fileoff_t start_fsb,
6091 xfs_fileoff_t length)
6092{
6093 xfs_fileoff_t remaining = length;
6094 int error = 0;
6095
6096 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
6097
6098 do {
6099 int done;
6100 xfs_bmbt_irec_t imap;
6101 int nimaps = 1;
6102 xfs_fsblock_t firstblock;
6103 xfs_bmap_free_t flist;
6104
6105 /*
6106 * Map the range first and check that it is a delalloc extent
6107 * before trying to unmap the range. Otherwise we will be
6108 * trying to remove a real extent (which requires a
6109 * transaction) or a hole, which is probably a bad idea...
6110 */
6111 error = xfs_bmapi(NULL, ip, start_fsb, 1,
6112 XFS_BMAPI_ENTIRE, NULL, 0, &imap,
6113 &nimaps, NULL);
6114
6115 if (error) {
6116 /* something screwed, just bail */
6117 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
6118 xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
6119 "Failed delalloc mapping lookup ino %lld fsb %lld.",
6120 ip->i_ino, start_fsb);
6121 }
6122 break;
6123 }
6124 if (!nimaps) {
6125 /* nothing there */
6126 goto next_block;
6127 }
6128 if (imap.br_startblock != DELAYSTARTBLOCK) {
6129 /* been converted, ignore */
6130 goto next_block;
6131 }
6132 WARN_ON(imap.br_blockcount == 0);
6133
6134 /*
6135 * Note: while we initialise the firstblock/flist pair, they
6136 * should never be used because blocks should never be
6137 * allocated or freed for a delalloc extent and hence we need
6138 * don't cancel or finish them after the xfs_bunmapi() call.
6139 */
6140 xfs_bmap_init(&flist, &firstblock);
6141 error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
6142 &flist, &done);
6143 if (error)
6144 break;
6145
6146 ASSERT(!flist.xbf_count && !flist.xbf_first);
6147next_block:
6148 start_fsb++;
6149 remaining--;
6150 } while(remaining > 0);
6151
6152 return error;
6153}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 71ec9b6ecdfc..3651191daea1 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
394 int whichfork, 394 int whichfork,
395 int *count); 395 int *count);
396 396
397int
398xfs_bmap_punch_delalloc_range(
399 struct xfs_inode *ip,
400 xfs_fileoff_t start_fsb,
401 xfs_fileoff_t length);
397#endif /* __KERNEL__ */ 402#endif /* __KERNEL__ */
398 403
399#endif /* __XFS_BMAP_H__ */ 404#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3b9582c60a22..e60490bc00a6 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -377,6 +377,19 @@ xfs_swap_extents(
377 ip->i_d.di_format = tip->i_d.di_format; 377 ip->i_d.di_format = tip->i_d.di_format;
378 tip->i_d.di_format = tmp; 378 tip->i_d.di_format = tmp;
379 379
380 /*
381 * The extents in the source inode could still contain speculative
382 * preallocation beyond EOF (e.g. the file is open but not modified
383 * while defrag is in progress). In that case, we need to copy over the
384 * number of delalloc blocks the data fork in the source inode is
385 * tracking beyond EOF so that when the fork is truncated away when the
386 * temporary inode is unlinked we don't underrun the i_delayed_blks
387 * counter on that inode.
388 */
389 ASSERT(tip->i_delayed_blks == 0);
390 tip->i_delayed_blks = ip->i_delayed_blks;
391 ip->i_delayed_blks = 0;
392
380 ilf_fields = XFS_ILOG_CORE; 393 ilf_fields = XFS_ILOG_CORE;
381 394
382 switch(ip->i_d.di_format) { 395 switch(ip->i_d.di_format) {
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed9990267661..c78cc6a3d87c 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,6 +58,7 @@ xfs_error_trap(int e)
58int xfs_etest[XFS_NUM_INJECT_ERROR]; 58int xfs_etest[XFS_NUM_INJECT_ERROR];
59int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; 59int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
60char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; 60char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
61int xfs_error_test_active;
61 62
62int 63int
63xfs_error_test(int error_tag, int *fsidp, char *expression, 64xfs_error_test(int error_tag, int *fsidp, char *expression,
@@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
108 len = strlen(mp->m_fsname); 109 len = strlen(mp->m_fsname);
109 xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); 110 xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
110 strcpy(xfs_etest_fsname[i], mp->m_fsname); 111 strcpy(xfs_etest_fsname[i], mp->m_fsname);
112 xfs_error_test_active++;
111 return 0; 113 return 0;
112 } 114 }
113 } 115 }
@@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
137 xfs_etest_fsid[i] = 0LL; 139 xfs_etest_fsid[i] = 0LL;
138 kmem_free(xfs_etest_fsname[i]); 140 kmem_free(xfs_etest_fsname[i]);
139 xfs_etest_fsname[i] = NULL; 141 xfs_etest_fsname[i] = NULL;
142 xfs_error_test_active--;
140 } 143 }
141 } 144 }
142 145
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c2c1a072bb82..f338847f80b8 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level,
127#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT 127#define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT
128 128
129#ifdef DEBUG 129#ifdef DEBUG
130extern int xfs_error_test_active;
130extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); 131extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
131 132
132#define XFS_NUM_INJECT_ERROR 10 133#define XFS_NUM_INJECT_ERROR 10
133#define XFS_TEST_ERROR(expr, mp, tag, rf) \ 134#define XFS_TEST_ERROR(expr, mp, tag, rf) \
134 ((expr) || \ 135 ((expr) || (xfs_error_test_active && \
135 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ 136 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
136 (rf))) 137 (rf))))
137 138
138extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); 139extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
139extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); 140extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9b715dce5699..9124425b7f2f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -744,9 +744,15 @@ xfs_filestream_new_ag(
744 * If the file's parent directory is known, take its iolock in exclusive 744 * If the file's parent directory is known, take its iolock in exclusive
745 * mode to prevent two sibling files from racing each other to migrate 745 * mode to prevent two sibling files from racing each other to migrate
746 * themselves and their parent to different AGs. 746 * themselves and their parent to different AGs.
747 *
748 * Note that we lock the parent directory iolock inside the child
749 * iolock here. That's fine as we never hold both parent and child
750 * iolock in any other place. This is different from the ilock,
751 * which requires locking of the child after the parent for namespace
752 * operations.
747 */ 753 */
748 if (pip) 754 if (pip)
749 xfs_ilock(pip, XFS_IOLOCK_EXCL); 755 xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
750 756
751 /* 757 /*
752 * A new AG needs to be found for the file. If the file's parent 758 * A new AG needs to be found for the file. If the file's parent
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0cdd26932d8e..d7de5a3f7867 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,6 +91,17 @@ xfs_inode_alloc(
91 return ip; 91 return ip;
92} 92}
93 93
94STATIC void
95xfs_inode_free_callback(
96 struct rcu_head *head)
97{
98 struct inode *inode = container_of(head, struct inode, i_rcu);
99 struct xfs_inode *ip = XFS_I(inode);
100
101 INIT_LIST_HEAD(&inode->i_dentry);
102 kmem_zone_free(xfs_inode_zone, ip);
103}
104
94void 105void
95xfs_inode_free( 106xfs_inode_free(
96 struct xfs_inode *ip) 107 struct xfs_inode *ip)
@@ -134,7 +145,7 @@ xfs_inode_free(
134 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 145 ASSERT(!spin_is_locked(&ip->i_flags_lock));
135 ASSERT(completion_done(&ip->i_flush)); 146 ASSERT(completion_done(&ip->i_flush));
136 147
137 kmem_zone_free(xfs_inode_zone, ip); 148 call_rcu(&ip->i_vnode.i_rcu, xfs_inode_free_callback);
138} 149}
139 150
140/* 151/*
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c7ac020705df..7c8d30c453c3 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -657,18 +657,37 @@ xfs_inode_item_unlock(
657} 657}
658 658
659/* 659/*
660 * This is called to find out where the oldest active copy of the 660 * This is called to find out where the oldest active copy of the inode log
661 * inode log item in the on disk log resides now that the last log 661 * item in the on disk log resides now that the last log write of it completed
662 * write of it completed at the given lsn. Since we always re-log 662 * at the given lsn. Since we always re-log all dirty data in an inode, the
663 * all dirty data in an inode, the latest copy in the on disk log 663 * latest copy in the on disk log is the only one that matters. Therefore,
664 * is the only one that matters. Therefore, simply return the 664 * simply return the given lsn.
665 * given lsn. 665 *
666 * If the inode has been marked stale because the cluster is being freed, we
667 * don't want to (re-)insert this inode into the AIL. There is a race condition
668 * where the cluster buffer may be unpinned before the inode is inserted into
669 * the AIL during transaction committed processing. If the buffer is unpinned
670 * before the inode item has been committed and inserted, then it is possible
671 * for the buffer to be written and IO completions before the inode is inserted
672 * into the AIL. In that case, we'd be inserting a clean, stale inode into the
673 * AIL which will never get removed. It will, however, get reclaimed which
674 * triggers an assert in xfs_inode_free() complaining about freein an inode
675 * still in the AIL.
676 *
677 * To avoid this, return a lower LSN than the one passed in so that the
678 * transaction committed code will not move the inode forward in the AIL but
679 * will still unpin it properly.
666 */ 680 */
667STATIC xfs_lsn_t 681STATIC xfs_lsn_t
668xfs_inode_item_committed( 682xfs_inode_item_committed(
669 struct xfs_log_item *lip, 683 struct xfs_log_item *lip,
670 xfs_lsn_t lsn) 684 xfs_lsn_t lsn)
671{ 685{
686 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
687 struct xfs_inode *ip = iip->ili_inode;
688
689 if (xfs_iflags_test(ip, XFS_ISTALE))
690 return lsn - 1;
672 return lsn; 691 return lsn;
673} 692}
674 693
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b1498ab5a399..19e9dfa1c254 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -275,6 +275,7 @@ xfs_free_perag(
275 pag = radix_tree_delete(&mp->m_perag_tree, agno); 275 pag = radix_tree_delete(&mp->m_perag_tree, agno);
276 spin_unlock(&mp->m_perag_lock); 276 spin_unlock(&mp->m_perag_lock);
277 ASSERT(pag); 277 ASSERT(pag);
278 ASSERT(atomic_read(&pag->pag_ref) == 0);
278 call_rcu(&pag->rcu_head, __xfs_free_perag); 279 call_rcu(&pag->rcu_head, __xfs_free_perag);
279 } 280 }
280} 281}
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 45ce15dc5b2b..edfa178bafb6 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -408,7 +408,7 @@ xfs_mru_cache_flush(
408 spin_lock(&mru->lock); 408 spin_lock(&mru->lock);
409 if (mru->queued) { 409 if (mru->queued) {
410 spin_unlock(&mru->lock); 410 spin_unlock(&mru->lock);
411 cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); 411 cancel_delayed_work_sync(&mru->work);
412 spin_lock(&mru->lock); 412 spin_lock(&mru->lock);
413 } 413 }
414 414
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0e64b113bd6..9bb6eda4cd21 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
346#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta) 346#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
347#define xfs_trans_apply_dquot_deltas(tp) 347#define xfs_trans_apply_dquot_deltas(tp)
348#define xfs_trans_unreserve_and_mod_dquots(tp) 348#define xfs_trans_unreserve_and_mod_dquots(tp)
349#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0) 349static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
350#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0) 350 struct xfs_inode *ip, long nblks, long ninos, uint flags)
351{
352 return 0;
353}
354static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
355 struct xfs_mount *mp, struct xfs_dquot *udqp,
356 struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
357{
358 return 0;
359}
351#define xfs_qm_vop_create_dqattach(tp, ip, u, g) 360#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
352#define xfs_qm_vop_rename_dqattach(it) (0) 361#define xfs_qm_vop_rename_dqattach(it) (0)
353#define xfs_qm_vop_chown(tp, ip, old, new) (NULL) 362#define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
357#define xfs_qm_dqdetach(ip) 366#define xfs_qm_dqdetach(ip)
358#define xfs_qm_dqrele(d) 367#define xfs_qm_dqrele(d)
359#define xfs_qm_statvfs(ip, s) 368#define xfs_qm_statvfs(ip, s)
360#define xfs_qm_sync(mp, fl) (0) 369static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
370{
371 return 0;
372}
361#define xfs_qm_newmount(mp, a, b) (0) 373#define xfs_qm_newmount(mp, a, b) (0)
362#define xfs_qm_mount_quotas(mp) 374#define xfs_qm_mount_quotas(mp)
363#define xfs_qm_unmount(mp) 375#define xfs_qm_unmount(mp)
364#define xfs_qm_unmount_quotas(mp) (0) 376#define xfs_qm_unmount_quotas(mp)
365#endif /* CONFIG_XFS_QUOTA */ 377#endif /* CONFIG_XFS_QUOTA */
366 378
367#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ 379#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d2af0a8381a6..77a59891734e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -297,6 +297,7 @@ xfs_rename(
297 * it and some incremental backup programs won't work without it. 297 * it and some incremental backup programs won't work without it.
298 */ 298 */
299 xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); 299 xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
300 xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
300 301
301 /* 302 /*
302 * Adjust the link count on src_dp. This is necessary when 303 * Adjust the link count on src_dp. This is necessary when