aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/Locking8
-rw-r--r--Documentation/filesystems/porting27
-rw-r--r--Documentation/filesystems/vfs.txt30
-rw-r--r--arch/arm/mach-tegra/clock.c7
-rw-r--r--arch/arm/mach-ux500/clock.c31
-rw-r--r--arch/arm/plat-omap/clock.c12
-rw-r--r--arch/arm/plat-samsung/clock.c7
-rw-r--r--arch/arm/plat-spear/clock.c7
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c11
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c29
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h2
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c22
-rw-r--r--drivers/base/devtmpfs.c337
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/char/generic_nvram.c4
-rw-r--r--drivers/char/nvram.c2
-rw-r--r--drivers/char/ps3flash.c13
-rw-r--r--drivers/macintosh/nvram.c4
-rw-r--r--drivers/md/md.c26
-rw-r--r--drivers/mtd/ubi/cdev.c10
-rw-r--r--drivers/sh/clk/core.c7
-rw-r--r--drivers/staging/pohmelfs/dir.c2
-rw-r--r--drivers/staging/pohmelfs/inode.c11
-rw-r--r--drivers/usb/gadget/printer.c5
-rw-r--r--drivers/video/fb_defio.c11
-rw-r--r--fs/9p/acl.c4
-rw-r--r--fs/9p/acl.h2
-rw-r--r--fs/9p/v9fs_vfs.h3
-rw-r--r--fs/9p/vfs_file.c22
-rw-r--r--fs/9p/vfs_inode.c6
-rw-r--r--fs/9p/vfs_inode_dotl.c4
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/file.c8
-rw-r--r--fs/afs/afs_vl.h2
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/security.c6
-rw-r--r--fs/afs/write.c18
-rw-r--r--fs/attr.c6
-rw-r--r--fs/bad_inode.c5
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--fs/binfmt_elf_fdpic.c3
-rw-r--r--fs/binfmt_misc.c3
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/btrfs/acl.c5
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/file.c169
-rw-r--r--fs/btrfs/inode.c25
-rw-r--r--fs/btrfs/ioctl.c16
-rw-r--r--fs/cachefiles/bind.c2
-rw-r--r--fs/ceph/caps.c6
-rw-r--r--fs/ceph/dir.c21
-rw-r--r--fs/ceph/file.c22
-rw-r--r--fs/ceph/inode.c6
-rw-r--r--fs/ceph/super.h5
-rw-r--r--fs/cifs/cifsfs.c11
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/cifs/dir.c14
-rw-r--r--fs/cifs/file.c18
-rw-r--r--fs/cifs/readdir.c2
-rw-r--r--fs/coda/coda_int.h2
-rw-r--r--fs/coda/coda_linux.h2
-rw-r--r--fs/coda/dir.c9
-rw-r--r--fs/coda/file.c8
-rw-r--r--fs/coda/pioctl.c4
-rw-r--r--fs/dcache.c262
-rw-r--r--fs/direct-io.c88
-rw-r--r--fs/ecryptfs/file.c7
-rw-r--r--fs/ecryptfs/inode.c37
-rw-r--r--fs/efs/namei.c7
-rw-r--r--fs/exec.c14
-rw-r--r--fs/exofs/file.c10
-rw-r--r--fs/exofs/namei.c7
-rw-r--r--fs/ext2/acl.c4
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext2/inode.c6
-rw-r--r--fs/ext2/namei.c14
-rw-r--r--fs/ext3/acl.c4
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/fsync.c18
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext3/namei.c14
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/acl.c4
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/file.c21
-rw-r--r--fs/ext4/fsync.c38
-rw-r--r--fs/ext4/inode.c125
-rw-r--r--fs/ext4/namei.c14
-rw-r--r--fs/fat/fat.h4
-rw-r--r--fs/fat/file.c8
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/fat/namei_msdos.c29
-rw-r--r--fs/fat/namei_vfat.c6
-rw-r--r--fs/fs-writeback.c28
-rw-r--r--fs/fuse/dir.c28
-rw-r--r--fs/fuse/file.c45
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/generic_acl.c4
-rw-r--r--fs/gfs2/acl.c4
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c19
-rw-r--r--fs/gfs2/inode.c35
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/hfs/inode.c15
-rw-r--r--fs/hfsplus/hfsplus_fs.h3
-rw-r--r--fs/hfsplus/inode.c16
-rw-r--r--fs/hostfs/hostfs_kern.c21
-rw-r--r--fs/hpfs/dir.c4
-rw-r--r--fs/hpfs/file.c7
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hppfs/hppfs.c5
-rw-r--r--fs/inode.c129
-rw-r--r--fs/internal.h6
-rw-r--r--fs/isofs/dir.c3
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/isofs/isofs.h1
-rw-r--r--fs/isofs/namei.c13
-rw-r--r--fs/isofs/rock.c3
-rw-r--r--fs/jffs2/acl.c4
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/dir.c9
-rw-r--r--fs/jffs2/file.c9
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jfs/acl.c4
-rw-r--r--fs/jfs/file.c11
-rw-r--r--fs/jfs/inode.c4
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/namei.c49
-rw-r--r--fs/libfs.c26
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/file.c11
-rw-r--r--fs/logfs/logfs.h2
-rw-r--r--fs/minix/inode.c3
-rw-r--r--fs/namei.c462
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/ncpfs/file.c4
-rw-r--r--fs/nfs/cache_lib.c9
-rw-r--r--fs/nfs/dir.c87
-rw-r--r--fs/nfs/direct.c4
-rw-r--r--fs/nfs/file.c18
-rw-r--r--fs/nfs/inode.c20
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4proc.c70
-rw-r--r--fs/nfs/nfs4state.c12
-rw-r--r--fs/nfs/pagelist.c4
-rw-r--r--fs/nfs/read.c8
-rw-r--r--fs/nfs/super.c16
-rw-r--r--fs/nfs/write.c22
-rw-r--r--fs/nfsd/nfs4recover.c52
-rw-r--r--fs/nilfs2/file.c12
-rw-r--r--fs/nilfs2/inode.c10
-rw-r--r--fs/nilfs2/namei.c7
-rw-r--r--fs/nilfs2/nilfs.h4
-rw-r--r--fs/ntfs/dir.c10
-rw-r--r--fs/ntfs/file.c13
-rw-r--r--fs/ntfs/inode.c10
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/acl.h2
-rw-r--r--fs/ocfs2/aops.c10
-rw-r--r--fs/ocfs2/file.c41
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/namei.c1
-rw-r--r--fs/ocfs2/refcounttree.c49
-rw-r--r--fs/open.c2
-rw-r--r--fs/proc/base.c6
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/read_write.c44
-rw-r--r--fs/reiserfs/dir.c13
-rw-r--r--fs/reiserfs/file.c10
-rw-r--r--fs/reiserfs/inode.c8
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/reiserfs/xattr.c25
-rw-r--r--fs/squashfs/namei.c10
-rw-r--r--fs/super.c176
-rw-r--r--fs/sync.c25
-rw-r--r--fs/sysfs/inode.c6
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/ubifs/file.c21
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c17
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c27
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c71
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h5
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--include/linux/anon_inodes.h2
-rw-r--r--include/linux/atomic.h26
-rw-r--r--include/linux/binfmts.h1
-rw-r--r--include/linux/dcache.h8
-rw-r--r--include/linux/ext3_fs.h2
-rw-r--r--include/linux/fb.h3
-rw-r--r--include/linux/fs.h100
-rw-r--r--include/linux/generic_acl.h2
-rw-r--r--include/linux/mm.h39
-rw-r--r--include/linux/mnt_namespace.h1
-rw-r--r--include/linux/namei.h5
-rw-r--r--include/linux/nfs_fs.h6
-rw-r--r--include/linux/nsproxy.h1
-rw-r--r--include/linux/reiserfs_xattr.h4
-rw-r--r--include/linux/rwsem.h10
-rw-r--r--include/linux/security.h9
-rw-r--r--include/linux/seq_file.h1
-rw-r--r--include/linux/shrinker.h42
-rw-r--r--include/trace/events/vmscan.h77
-rw-r--r--ipc/shm.c4
-rw-r--r--kernel/cgroup.c3
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/nsproxy.c4
-rw-r--r--kernel/rwsem.c16
-rw-r--r--mm/filemap.c3
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/swapfile.c29
-rw-r--r--mm/truncate.c3
-rw-r--r--mm/vmscan.c71
-rw-r--r--net/sunrpc/clnt.c11
-rw-r--r--net/unix/af_unix.c38
-rw-r--r--security/capability.c2
-rw-r--r--security/security.c9
-rw-r--r--security/selinux/avc.c2
-rw-r--r--security/selinux/hooks.c3
-rw-r--r--security/smack/smack_lsm.c5
-rw-r--r--security/tomoyo/realpath.c2
235 files changed, 2517 insertions, 1948 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 57d827d6071d..ca7e25292542 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -52,7 +52,7 @@ ata *);
52 void (*put_link) (struct dentry *, struct nameidata *, void *); 52 void (*put_link) (struct dentry *, struct nameidata *, void *);
53 void (*truncate) (struct inode *); 53 void (*truncate) (struct inode *);
54 int (*permission) (struct inode *, int, unsigned int); 54 int (*permission) (struct inode *, int, unsigned int);
55 int (*check_acl)(struct inode *, int, unsigned int); 55 int (*check_acl)(struct inode *, int);
56 int (*setattr) (struct dentry *, struct iattr *); 56 int (*setattr) (struct dentry *, struct iattr *);
57 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); 57 int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
58 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 58 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -412,7 +412,7 @@ prototypes:
412 int (*open) (struct inode *, struct file *); 412 int (*open) (struct inode *, struct file *);
413 int (*flush) (struct file *); 413 int (*flush) (struct file *);
414 int (*release) (struct inode *, struct file *); 414 int (*release) (struct inode *, struct file *);
415 int (*fsync) (struct file *, int datasync); 415 int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
416 int (*aio_fsync) (struct kiocb *, int datasync); 416 int (*aio_fsync) (struct kiocb *, int datasync);
417 int (*fasync) (int, struct file *, int); 417 int (*fasync) (int, struct file *, int);
418 int (*lock) (struct file *, int, struct file_lock *); 418 int (*lock) (struct file *, int, struct file_lock *);
@@ -438,9 +438,7 @@ prototypes:
438 438
439locking rules: 439locking rules:
440 All may block except for ->setlease. 440 All may block except for ->setlease.
441 No VFS locks held on entry except for ->fsync and ->setlease. 441 No VFS locks held on entry except for ->setlease.
442
443->fsync() has i_mutex on inode.
444 442
445->setlease has the file_list_lock held and must not sleep. 443->setlease has the file_list_lock held and must not sleep.
446 444
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 6e29954851a2..7f8861d341ea 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -400,10 +400,31 @@ a file off.
400 400
401-- 401--
402[mandatory] 402[mandatory]
403
404--
405[mandatory]
406 ->get_sb() is gone. Switch to use of ->mount(). Typically it's just 403 ->get_sb() is gone. Switch to use of ->mount(). Typically it's just
407a matter of switching from calling get_sb_... to mount_... and changing the 404a matter of switching from calling get_sb_... to mount_... and changing the
408function type. If you were doing it manually, just switch from setting ->mnt_root 405function type. If you were doing it manually, just switch from setting ->mnt_root
409to some pointer to returning that pointer. On errors return ERR_PTR(...). 406to some pointer to returning that pointer. On errors return ERR_PTR(...).
407
408--
409[mandatory]
410 ->permission(), generic_permission() and ->check_acl() have lost flags
411argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask.
412 generic_permission() has also lost the check_acl argument; if you want
413non-NULL to be used for that inode, put it into ->i_op->check_acl.
414
415--
416[mandatory]
417 If you implement your own ->llseek() you must handle SEEK_HOLE and
418SEEK_DATA. You can hanle this by returning -EINVAL, but it would be nicer to
419support it in some way. The generic handler assumes that the entire file is
420data and there is a virtual hole at the end of the file. So if the provided
421offset is less than i_size and SEEK_DATA is specified, return the same offset.
422If the above is true for the offset and you are given SEEK_HOLE, return the end
423of the file. If the offset is i_size or greater return -ENXIO in either case.
424
425[mandatory]
426 If you have your own ->fsync() you must make sure to call
427filemap_write_and_wait_range() so that all dirty pages are synced out properly.
428You must also keep in mind that ->fsync() is not called with i_mutex held
429anymore, so if you require i_mutex locking you must make sure to take it and
430release it yourself.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 88b9f5519af9..eff6617c9a0f 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -229,6 +229,8 @@ struct super_operations {
229 229
230 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); 230 ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
231 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 231 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
232 int (*nr_cached_objects)(struct super_block *);
233 void (*free_cached_objects)(struct super_block *, int);
232}; 234};
233 235
234All methods are called without any locks being held, unless otherwise 236All methods are called without any locks being held, unless otherwise
@@ -301,6 +303,26 @@ or bottom half).
301 303
302 quota_write: called by the VFS to write to filesystem quota file. 304 quota_write: called by the VFS to write to filesystem quota file.
303 305
306 nr_cached_objects: called by the sb cache shrinking function for the
307 filesystem to return the number of freeable cached objects it contains.
308 Optional.
309
310 free_cache_objects: called by the sb cache shrinking function for the
311 filesystem to scan the number of objects indicated to try to free them.
312 Optional, but any filesystem implementing this method needs to also
313 implement ->nr_cached_objects for it to be called correctly.
314
315 We can't do anything with any errors that the filesystem might
316 encountered, hence the void return type. This will never be called if
317 the VM is trying to reclaim under GFP_NOFS conditions, hence this
318 method does not need to handle that situation itself.
319
320 Implementations must include conditional reschedule calls inside any
321 scanning loop that is done. This allows the VFS to determine
322 appropriate scan batch sizes without having to worry about whether
323 implementations will cause holdoff problems due to large scan batch
324 sizes.
325
304Whoever sets up the inode is responsible for filling in the "i_op" field. This 326Whoever sets up the inode is responsible for filling in the "i_op" field. This
305is a pointer to a "struct inode_operations" which describes the methods that 327is a pointer to a "struct inode_operations" which describes the methods that
306can be performed on individual inodes. 328can be performed on individual inodes.
@@ -333,8 +355,8 @@ struct inode_operations {
333 void * (*follow_link) (struct dentry *, struct nameidata *); 355 void * (*follow_link) (struct dentry *, struct nameidata *);
334 void (*put_link) (struct dentry *, struct nameidata *, void *); 356 void (*put_link) (struct dentry *, struct nameidata *, void *);
335 void (*truncate) (struct inode *); 357 void (*truncate) (struct inode *);
336 int (*permission) (struct inode *, int, unsigned int); 358 int (*permission) (struct inode *, int);
337 int (*check_acl)(struct inode *, int, unsigned int); 359 int (*check_acl)(struct inode *, int);
338 int (*setattr) (struct dentry *, struct iattr *); 360 int (*setattr) (struct dentry *, struct iattr *);
339 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); 361 int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
340 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); 362 int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
@@ -423,7 +445,7 @@ otherwise noted.
423 permission: called by the VFS to check for access rights on a POSIX-like 445 permission: called by the VFS to check for access rights on a POSIX-like
424 filesystem. 446 filesystem.
425 447
426 May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk 448 May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
427 mode, the filesystem must check the permission without blocking or 449 mode, the filesystem must check the permission without blocking or
428 storing to the inode. 450 storing to the inode.
429 451
@@ -755,7 +777,7 @@ struct file_operations {
755 int (*open) (struct inode *, struct file *); 777 int (*open) (struct inode *, struct file *);
756 int (*flush) (struct file *); 778 int (*flush) (struct file *);
757 int (*release) (struct inode *, struct file *); 779 int (*release) (struct inode *, struct file *);
758 int (*fsync) (struct file *, int datasync); 780 int (*fsync) (struct file *, loff_t, loff_t, int datasync);
759 int (*aio_fsync) (struct kiocb *, int datasync); 781 int (*aio_fsync) (struct kiocb *, int datasync);
760 int (*fasync) (int, struct file *, int); 782 int (*fasync) (int, struct file *, int);
761 int (*lock) (struct file *, int, struct file_lock *); 783 int (*lock) (struct file *, int, struct file_lock *);
diff --git a/arch/arm/mach-tegra/clock.c b/arch/arm/mach-tegra/clock.c
index e028320ab423..f8d41ffc0ca9 100644
--- a/arch/arm/mach-tegra/clock.c
+++ b/arch/arm/mach-tegra/clock.c
@@ -585,7 +585,7 @@ static const struct file_operations possible_parents_fops = {
585 585
586static int clk_debugfs_register_one(struct clk *c) 586static int clk_debugfs_register_one(struct clk *c)
587{ 587{
588 struct dentry *d, *child, *child_tmp; 588 struct dentry *d;
589 589
590 d = debugfs_create_dir(c->name, clk_debugfs_root); 590 d = debugfs_create_dir(c->name, clk_debugfs_root);
591 if (!d) 591 if (!d)
@@ -614,10 +614,7 @@ static int clk_debugfs_register_one(struct clk *c)
614 return 0; 614 return 0;
615 615
616err_out: 616err_out:
617 d = c->dent; 617 debugfs_remove_recursive(c->dent);
618 list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
619 debugfs_remove(child);
620 debugfs_remove(c->dent);
621 return -ENOMEM; 618 return -ENOMEM;
622} 619}
623 620
diff --git a/arch/arm/mach-ux500/clock.c b/arch/arm/mach-ux500/clock.c
index 32ce90840ee1..7d107be63eb4 100644
--- a/arch/arm/mach-ux500/clock.c
+++ b/arch/arm/mach-ux500/clock.c
@@ -635,16 +635,13 @@ static const struct file_operations set_rate_fops = {
635static struct dentry *clk_debugfs_register_dir(struct clk *c, 635static struct dentry *clk_debugfs_register_dir(struct clk *c,
636 struct dentry *p_dentry) 636 struct dentry *p_dentry)
637{ 637{
638 struct dentry *d, *clk_d, *child, *child_tmp; 638 struct dentry *d, *clk_d;
639 char s[255]; 639 const char *p = c->name;
640 char *p = s;
641 640
642 if (c->name == NULL) 641 if (!p)
643 p += sprintf(p, "BUG"); 642 p = "BUG";
644 else
645 p += sprintf(p, "%s", c->name);
646 643
647 clk_d = debugfs_create_dir(s, p_dentry); 644 clk_d = debugfs_create_dir(p, p_dentry);
648 if (!clk_d) 645 if (!clk_d)
649 return NULL; 646 return NULL;
650 647
@@ -666,24 +663,10 @@ static struct dentry *clk_debugfs_register_dir(struct clk *c,
666 return clk_d; 663 return clk_d;
667 664
668err_out: 665err_out:
669 d = clk_d; 666 debugfs_remove_recursive(clk_d);
670 list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
671 debugfs_remove(child);
672 debugfs_remove(clk_d);
673 return NULL; 667 return NULL;
674} 668}
675 669
676static void clk_debugfs_remove_dir(struct dentry *cdentry)
677{
678 struct dentry *d, *child, *child_tmp;
679
680 d = cdentry;
681 list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
682 debugfs_remove(child);
683 debugfs_remove(cdentry);
684 return ;
685}
686
687static int clk_debugfs_register_one(struct clk *c) 670static int clk_debugfs_register_one(struct clk *c)
688{ 671{
689 struct clk *pa = c->parent_periph; 672 struct clk *pa = c->parent_periph;
@@ -700,7 +683,7 @@ static int clk_debugfs_register_one(struct clk *c)
700 c->dent_bus = clk_debugfs_register_dir(c, 683 c->dent_bus = clk_debugfs_register_dir(c,
701 bpa->dent_bus ? bpa->dent_bus : bpa->dent); 684 bpa->dent_bus ? bpa->dent_bus : bpa->dent);
702 if ((!c->dent_bus) && (c->dent)) { 685 if ((!c->dent_bus) && (c->dent)) {
703 clk_debugfs_remove_dir(c->dent); 686 debugfs_remove_recursive(c->dent);
704 c->dent = NULL; 687 c->dent = NULL;
705 return -ENOMEM; 688 return -ENOMEM;
706 } 689 }
diff --git a/arch/arm/plat-omap/clock.c b/arch/arm/plat-omap/clock.c
index c9122dd6ee8d..964704f40bbe 100644
--- a/arch/arm/plat-omap/clock.c
+++ b/arch/arm/plat-omap/clock.c
@@ -480,13 +480,10 @@ static struct dentry *clk_debugfs_root;
480static int clk_debugfs_register_one(struct clk *c) 480static int clk_debugfs_register_one(struct clk *c)
481{ 481{
482 int err; 482 int err;
483 struct dentry *d, *child, *child_tmp; 483 struct dentry *d;
484 struct clk *pa = c->parent; 484 struct clk *pa = c->parent;
485 char s[255];
486 char *p = s;
487 485
488 p += sprintf(p, "%s", c->name); 486 d = debugfs_create_dir(c->name, pa ? pa->dent : clk_debugfs_root);
489 d = debugfs_create_dir(s, pa ? pa->dent : clk_debugfs_root);
490 if (!d) 487 if (!d)
491 return -ENOMEM; 488 return -ENOMEM;
492 c->dent = d; 489 c->dent = d;
@@ -509,10 +506,7 @@ static int clk_debugfs_register_one(struct clk *c)
509 return 0; 506 return 0;
510 507
511err_out: 508err_out:
512 d = c->dent; 509 debugfs_remove_recursive(c->dent);
513 list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
514 debugfs_remove(child);
515 debugfs_remove(c->dent);
516 return err; 510 return err;
517} 511}
518 512
diff --git a/arch/arm/plat-samsung/clock.c b/arch/arm/plat-samsung/clock.c
index 772892826ffc..0c9f95d98561 100644
--- a/arch/arm/plat-samsung/clock.c
+++ b/arch/arm/plat-samsung/clock.c
@@ -458,7 +458,7 @@ static struct dentry *clk_debugfs_root;
458static int clk_debugfs_register_one(struct clk *c) 458static int clk_debugfs_register_one(struct clk *c)
459{ 459{
460 int err; 460 int err;
461 struct dentry *d, *child, *child_tmp; 461 struct dentry *d;
462 struct clk *pa = c->parent; 462 struct clk *pa = c->parent;
463 char s[255]; 463 char s[255];
464 char *p = s; 464 char *p = s;
@@ -488,10 +488,7 @@ static int clk_debugfs_register_one(struct clk *c)
488 return 0; 488 return 0;
489 489
490err_out: 490err_out:
491 d = c->dent; 491 debugfs_remove_recursive(c->dent);
492 list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
493 debugfs_remove(child);
494 debugfs_remove(c->dent);
495 return err; 492 return err;
496} 493}
497 494
diff --git a/arch/arm/plat-spear/clock.c b/arch/arm/plat-spear/clock.c
index 6fa474cb398e..67dd00381ea6 100644
--- a/arch/arm/plat-spear/clock.c
+++ b/arch/arm/plat-spear/clock.c
@@ -916,7 +916,7 @@ static struct dentry *clk_debugfs_root;
916static int clk_debugfs_register_one(struct clk *c) 916static int clk_debugfs_register_one(struct clk *c)
917{ 917{
918 int err; 918 int err;
919 struct dentry *d, *child; 919 struct dentry *d;
920 struct clk *pa = c->pclk; 920 struct clk *pa = c->pclk;
921 char s[255]; 921 char s[255];
922 char *p = s; 922 char *p = s;
@@ -951,10 +951,7 @@ static int clk_debugfs_register_one(struct clk *c)
951 return 0; 951 return 0;
952 952
953err_out: 953err_out:
954 d = c->dent; 954 debugfs_remove_recursive(c->dent);
955 list_for_each_entry(child, &d->d_subdirs, d_u.d_child)
956 debugfs_remove(child);
957 debugfs_remove(c->dent);
958 return err; 955 return err;
959} 956}
960 957
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 3c7c3f82d842..fb59c46e9e9e 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1850,9 +1850,16 @@ out:
1850 return ret; 1850 return ret;
1851} 1851}
1852 1852
1853static int spufs_mfc_fsync(struct file *file, int datasync) 1853static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1854{ 1854{
1855 return spufs_mfc_flush(file, NULL); 1855 struct inode *inode = file->f_path.dentry->d_inode;
1856 int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
1857 if (!err) {
1858 mutex_lock(&inode->i_mutex);
1859 err = spufs_mfc_flush(file, NULL);
1860 mutex_unlock(&inode->i_mutex);
1861 }
1862 return err;
1856} 1863}
1857 1864
1858static int spufs_mfc_fasync(int fd, struct file *file, int on) 1865static int spufs_mfc_fasync(int fd, struct file *file, int on)
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 856e9c398068..e481f6b9a789 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -611,15 +611,14 @@ out:
611 611
612static struct file_system_type spufs_type; 612static struct file_system_type spufs_type;
613 613
614long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode, 614long spufs_create(struct path *path, struct dentry *dentry,
615 struct file *filp) 615 unsigned int flags, mode_t mode, struct file *filp)
616{ 616{
617 struct dentry *dentry;
618 int ret; 617 int ret;
619 618
620 ret = -EINVAL; 619 ret = -EINVAL;
621 /* check if we are on spufs */ 620 /* check if we are on spufs */
622 if (nd->path.dentry->d_sb->s_type != &spufs_type) 621 if (path->dentry->d_sb->s_type != &spufs_type)
623 goto out; 622 goto out;
624 623
625 /* don't accept undefined flags */ 624 /* don't accept undefined flags */
@@ -627,33 +626,27 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
627 goto out; 626 goto out;
628 627
629 /* only threads can be underneath a gang */ 628 /* only threads can be underneath a gang */
630 if (nd->path.dentry != nd->path.dentry->d_sb->s_root) { 629 if (path->dentry != path->dentry->d_sb->s_root) {
631 if ((flags & SPU_CREATE_GANG) || 630 if ((flags & SPU_CREATE_GANG) ||
632 !SPUFS_I(nd->path.dentry->d_inode)->i_gang) 631 !SPUFS_I(path->dentry->d_inode)->i_gang)
633 goto out; 632 goto out;
634 } 633 }
635 634
636 dentry = lookup_create(nd, 1);
637 ret = PTR_ERR(dentry);
638 if (IS_ERR(dentry))
639 goto out_dir;
640
641 mode &= ~current_umask(); 635 mode &= ~current_umask();
642 636
643 if (flags & SPU_CREATE_GANG) 637 if (flags & SPU_CREATE_GANG)
644 ret = spufs_create_gang(nd->path.dentry->d_inode, 638 ret = spufs_create_gang(path->dentry->d_inode,
645 dentry, nd->path.mnt, mode); 639 dentry, path->mnt, mode);
646 else 640 else
647 ret = spufs_create_context(nd->path.dentry->d_inode, 641 ret = spufs_create_context(path->dentry->d_inode,
648 dentry, nd->path.mnt, flags, mode, 642 dentry, path->mnt, flags, mode,
649 filp); 643 filp);
650 if (ret >= 0) 644 if (ret >= 0)
651 fsnotify_mkdir(nd->path.dentry->d_inode, dentry); 645 fsnotify_mkdir(path->dentry->d_inode, dentry);
652 return ret; 646 return ret;
653 647
654out_dir:
655 mutex_unlock(&nd->path.dentry->d_inode->i_mutex);
656out: 648out:
649 mutex_unlock(&path->dentry->d_inode->i_mutex);
657 return ret; 650 return ret;
658} 651}
659 652
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index c448bac65518..099245f230b2 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -248,7 +248,7 @@ extern const struct spufs_tree_descr spufs_dir_debug_contents[];
248/* system call implementation */ 248/* system call implementation */
249extern struct spufs_calls spufs_calls; 249extern struct spufs_calls spufs_calls;
250long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status); 250long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
251long spufs_create(struct nameidata *nd, unsigned int flags, 251long spufs_create(struct path *nd, struct dentry *dentry, unsigned int flags,
252 mode_t mode, struct file *filp); 252 mode_t mode, struct file *filp);
253/* ELF coredump callbacks for writing SPU ELF notes */ 253/* ELF coredump callbacks for writing SPU ELF notes */
254extern int spufs_coredump_extra_notes_size(void); 254extern int spufs_coredump_extra_notes_size(void);
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c
index a3d2ce54ea2e..609e016e92d0 100644
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -62,21 +62,17 @@ out:
62static long do_spu_create(const char __user *pathname, unsigned int flags, 62static long do_spu_create(const char __user *pathname, unsigned int flags,
63 mode_t mode, struct file *neighbor) 63 mode_t mode, struct file *neighbor)
64{ 64{
65 char *tmp; 65 struct path path;
66 struct dentry *dentry;
66 int ret; 67 int ret;
67 68
68 tmp = getname(pathname); 69 dentry = user_path_create(AT_FDCWD, pathname, &path, 1);
69 ret = PTR_ERR(tmp); 70 ret = PTR_ERR(dentry);
70 if (!IS_ERR(tmp)) { 71 if (!IS_ERR(dentry)) {
71 struct nameidata nd; 72 ret = spufs_create(&path, dentry, flags, mode, neighbor);
72 73 mutex_unlock(&path.dentry->d_inode->i_mutex);
73 ret = kern_path_parent(tmp, &nd); 74 dput(dentry);
74 if (!ret) { 75 path_put(&path);
75 nd.flags |= LOOKUP_OPEN | LOOKUP_CREATE;
76 ret = spufs_create(&nd, flags, mode, neighbor);
77 path_put(&nd.path);
78 }
79 putname(tmp);
80 } 76 }
81 77
82 return ret; 78 return ret;
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 82bbb5967aa9..6d678c99512e 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -21,12 +21,11 @@
21#include <linux/fs.h> 21#include <linux/fs.h>
22#include <linux/shmem_fs.h> 22#include <linux/shmem_fs.h>
23#include <linux/ramfs.h> 23#include <linux/ramfs.h>
24#include <linux/cred.h>
25#include <linux/sched.h> 24#include <linux/sched.h>
26#include <linux/init_task.h>
27#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/kthread.h>
28 27
29static struct vfsmount *dev_mnt; 28static struct task_struct *thread;
30 29
31#if defined CONFIG_DEVTMPFS_MOUNT 30#if defined CONFIG_DEVTMPFS_MOUNT
32static int mount_dev = 1; 31static int mount_dev = 1;
@@ -34,7 +33,16 @@ static int mount_dev = 1;
34static int mount_dev; 33static int mount_dev;
35#endif 34#endif
36 35
37static DEFINE_MUTEX(dirlock); 36static DEFINE_SPINLOCK(req_lock);
37
38static struct req {
39 struct req *next;
40 struct completion done;
41 int err;
42 const char *name;
43 mode_t mode; /* 0 => delete */
44 struct device *dev;
45} *requests;
38 46
39static int __init mount_param(char *str) 47static int __init mount_param(char *str)
40{ 48{
@@ -68,131 +76,152 @@ static inline int is_blockdev(struct device *dev)
68static inline int is_blockdev(struct device *dev) { return 0; } 76static inline int is_blockdev(struct device *dev) { return 0; }
69#endif 77#endif
70 78
79int devtmpfs_create_node(struct device *dev)
80{
81 const char *tmp = NULL;
82 struct req req;
83
84 if (!thread)
85 return 0;
86
87 req.mode = 0;
88 req.name = device_get_devnode(dev, &req.mode, &tmp);
89 if (!req.name)
90 return -ENOMEM;
91
92 if (req.mode == 0)
93 req.mode = 0600;
94 if (is_blockdev(dev))
95 req.mode |= S_IFBLK;
96 else
97 req.mode |= S_IFCHR;
98
99 req.dev = dev;
100
101 init_completion(&req.done);
102
103 spin_lock(&req_lock);
104 req.next = requests;
105 requests = &req;
106 spin_unlock(&req_lock);
107
108 wake_up_process(thread);
109 wait_for_completion(&req.done);
110
111 kfree(tmp);
112
113 return req.err;
114}
115
116int devtmpfs_delete_node(struct device *dev)
117{
118 const char *tmp = NULL;
119 struct req req;
120
121 if (!thread)
122 return 0;
123
124 req.name = device_get_devnode(dev, NULL, &tmp);
125 if (!req.name)
126 return -ENOMEM;
127
128 req.mode = 0;
129 req.dev = dev;
130
131 init_completion(&req.done);
132
133 spin_lock(&req_lock);
134 req.next = requests;
135 requests = &req;
136 spin_unlock(&req_lock);
137
138 wake_up_process(thread);
139 wait_for_completion(&req.done);
140
141 kfree(tmp);
142 return req.err;
143}
144
71static int dev_mkdir(const char *name, mode_t mode) 145static int dev_mkdir(const char *name, mode_t mode)
72{ 146{
73 struct nameidata nd;
74 struct dentry *dentry; 147 struct dentry *dentry;
148 struct path path;
75 int err; 149 int err;
76 150
77 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 151 dentry = kern_path_create(AT_FDCWD, name, &path, 1);
78 name, LOOKUP_PARENT, &nd); 152 if (IS_ERR(dentry))
79 if (err) 153 return PTR_ERR(dentry);
80 return err; 154
81 155 err = vfs_mkdir(path.dentry->d_inode, dentry, mode);
82 dentry = lookup_create(&nd, 1); 156 if (!err)
83 if (!IS_ERR(dentry)) { 157 /* mark as kernel-created inode */
84 err = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 158 dentry->d_inode->i_private = &thread;
85 if (!err) 159 dput(dentry);
86 /* mark as kernel-created inode */ 160 mutex_unlock(&path.dentry->d_inode->i_mutex);
87 dentry->d_inode->i_private = &dev_mnt; 161 path_put(&path);
88 dput(dentry);
89 } else {
90 err = PTR_ERR(dentry);
91 }
92
93 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
94 path_put(&nd.path);
95 return err; 162 return err;
96} 163}
97 164
98static int create_path(const char *nodepath) 165static int create_path(const char *nodepath)
99{ 166{
167 char *path;
168 char *s;
100 int err; 169 int err;
101 170
102 mutex_lock(&dirlock); 171 /* parent directories do not exist, create them */
103 err = dev_mkdir(nodepath, 0755); 172 path = kstrdup(nodepath, GFP_KERNEL);
104 if (err == -ENOENT) { 173 if (!path)
105 char *path; 174 return -ENOMEM;
106 char *s; 175
107 176 s = path;
108 /* parent directories do not exist, create them */ 177 for (;;) {
109 path = kstrdup(nodepath, GFP_KERNEL); 178 s = strchr(s, '/');
110 if (!path) { 179 if (!s)
111 err = -ENOMEM; 180 break;
112 goto out; 181 s[0] = '\0';
113 } 182 err = dev_mkdir(path, 0755);
114 s = path; 183 if (err && err != -EEXIST)
115 for (;;) { 184 break;
116 s = strchr(s, '/'); 185 s[0] = '/';
117 if (!s) 186 s++;
118 break;
119 s[0] = '\0';
120 err = dev_mkdir(path, 0755);
121 if (err && err != -EEXIST)
122 break;
123 s[0] = '/';
124 s++;
125 }
126 kfree(path);
127 } 187 }
128out: 188 kfree(path);
129 mutex_unlock(&dirlock);
130 return err; 189 return err;
131} 190}
132 191
133int devtmpfs_create_node(struct device *dev) 192static int handle_create(const char *nodename, mode_t mode, struct device *dev)
134{ 193{
135 const char *tmp = NULL;
136 const char *nodename;
137 const struct cred *curr_cred;
138 mode_t mode = 0;
139 struct nameidata nd;
140 struct dentry *dentry; 194 struct dentry *dentry;
195 struct path path;
141 int err; 196 int err;
142 197
143 if (!dev_mnt) 198 dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
144 return 0; 199 if (dentry == ERR_PTR(-ENOENT)) {
145
146 nodename = device_get_devnode(dev, &mode, &tmp);
147 if (!nodename)
148 return -ENOMEM;
149
150 if (mode == 0)
151 mode = 0600;
152 if (is_blockdev(dev))
153 mode |= S_IFBLK;
154 else
155 mode |= S_IFCHR;
156
157 curr_cred = override_creds(&init_cred);
158
159 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
160 nodename, LOOKUP_PARENT, &nd);
161 if (err == -ENOENT) {
162 create_path(nodename); 200 create_path(nodename);
163 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 201 dentry = kern_path_create(AT_FDCWD, nodename, &path, 0);
164 nodename, LOOKUP_PARENT, &nd);
165 } 202 }
166 if (err) 203 if (IS_ERR(dentry))
167 goto out; 204 return PTR_ERR(dentry);
168 205
169 dentry = lookup_create(&nd, 0); 206 err = vfs_mknod(path.dentry->d_inode,
170 if (!IS_ERR(dentry)) { 207 dentry, mode, dev->devt);
171 err = vfs_mknod(nd.path.dentry->d_inode, 208 if (!err) {
172 dentry, mode, dev->devt); 209 struct iattr newattrs;
173 if (!err) { 210
174 struct iattr newattrs; 211 /* fixup possibly umasked mode */
175 212 newattrs.ia_mode = mode;
176 /* fixup possibly umasked mode */ 213 newattrs.ia_valid = ATTR_MODE;
177 newattrs.ia_mode = mode; 214 mutex_lock(&dentry->d_inode->i_mutex);
178 newattrs.ia_valid = ATTR_MODE; 215 notify_change(dentry, &newattrs);
179 mutex_lock(&dentry->d_inode->i_mutex); 216 mutex_unlock(&dentry->d_inode->i_mutex);
180 notify_change(dentry, &newattrs); 217
181 mutex_unlock(&dentry->d_inode->i_mutex); 218 /* mark as kernel-created inode */
182 219 dentry->d_inode->i_private = &thread;
183 /* mark as kernel-created inode */
184 dentry->d_inode->i_private = &dev_mnt;
185 }
186 dput(dentry);
187 } else {
188 err = PTR_ERR(dentry);
189 } 220 }
221 dput(dentry);
190 222
191 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 223 mutex_unlock(&path.dentry->d_inode->i_mutex);
192 path_put(&nd.path); 224 path_put(&path);
193out:
194 kfree(tmp);
195 revert_creds(curr_cred);
196 return err; 225 return err;
197} 226}
198 227
@@ -202,8 +231,7 @@ static int dev_rmdir(const char *name)
202 struct dentry *dentry; 231 struct dentry *dentry;
203 int err; 232 int err;
204 233
205 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt, 234 err = kern_path_parent(name, &nd);
206 name, LOOKUP_PARENT, &nd);
207 if (err) 235 if (err)
208 return err; 236 return err;
209 237
@@ -211,7 +239,7 @@ static int dev_rmdir(const char *name)
211 dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); 239 dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
212 if (!IS_ERR(dentry)) { 240 if (!IS_ERR(dentry)) {
213 if (dentry->d_inode) { 241 if (dentry->d_inode) {
214 if (dentry->d_inode->i_private == &dev_mnt) 242 if (dentry->d_inode->i_private == &thread)
215 err = vfs_rmdir(nd.path.dentry->d_inode, 243 err = vfs_rmdir(nd.path.dentry->d_inode,
216 dentry); 244 dentry);
217 else 245 else
@@ -238,7 +266,6 @@ static int delete_path(const char *nodepath)
238 if (!path) 266 if (!path)
239 return -ENOMEM; 267 return -ENOMEM;
240 268
241 mutex_lock(&dirlock);
242 for (;;) { 269 for (;;) {
243 char *base; 270 char *base;
244 271
@@ -250,7 +277,6 @@ static int delete_path(const char *nodepath)
250 if (err) 277 if (err)
251 break; 278 break;
252 } 279 }
253 mutex_unlock(&dirlock);
254 280
255 kfree(path); 281 kfree(path);
256 return err; 282 return err;
@@ -259,7 +285,7 @@ static int delete_path(const char *nodepath)
259static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat) 285static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
260{ 286{
261 /* did we create it */ 287 /* did we create it */
262 if (inode->i_private != &dev_mnt) 288 if (inode->i_private != &thread)
263 return 0; 289 return 0;
264 290
265 /* does the dev_t match */ 291 /* does the dev_t match */
@@ -277,29 +303,17 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta
277 return 1; 303 return 1;
278} 304}
279 305
280int devtmpfs_delete_node(struct device *dev) 306static int handle_remove(const char *nodename, struct device *dev)
281{ 307{
282 const char *tmp = NULL;
283 const char *nodename;
284 const struct cred *curr_cred;
285 struct nameidata nd; 308 struct nameidata nd;
286 struct dentry *dentry; 309 struct dentry *dentry;
287 struct kstat stat; 310 struct kstat stat;
288 int deleted = 1; 311 int deleted = 1;
289 int err; 312 int err;
290 313
291 if (!dev_mnt) 314 err = kern_path_parent(nodename, &nd);
292 return 0;
293
294 nodename = device_get_devnode(dev, NULL, &tmp);
295 if (!nodename)
296 return -ENOMEM;
297
298 curr_cred = override_creds(&init_cred);
299 err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
300 nodename, LOOKUP_PARENT, &nd);
301 if (err) 315 if (err)
302 goto out; 316 return err;
303 317
304 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 318 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
305 dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); 319 dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
@@ -337,9 +351,6 @@ int devtmpfs_delete_node(struct device *dev)
337 path_put(&nd.path); 351 path_put(&nd.path);
338 if (deleted && strchr(nodename, '/')) 352 if (deleted && strchr(nodename, '/'))
339 delete_path(nodename); 353 delete_path(nodename);
340out:
341 kfree(tmp);
342 revert_creds(curr_cred);
343 return err; 354 return err;
344} 355}
345 356
@@ -354,7 +365,7 @@ int devtmpfs_mount(const char *mntdir)
354 if (!mount_dev) 365 if (!mount_dev)
355 return 0; 366 return 0;
356 367
357 if (!dev_mnt) 368 if (!thread)
358 return 0; 369 return 0;
359 370
360 err = sys_mount("devtmpfs", (char *)mntdir, "devtmpfs", MS_SILENT, NULL); 371 err = sys_mount("devtmpfs", (char *)mntdir, "devtmpfs", MS_SILENT, NULL);
@@ -365,31 +376,79 @@ int devtmpfs_mount(const char *mntdir)
365 return err; 376 return err;
366} 377}
367 378
379static __initdata DECLARE_COMPLETION(setup_done);
380
381static int handle(const char *name, mode_t mode, struct device *dev)
382{
383 if (mode)
384 return handle_create(name, mode, dev);
385 else
386 return handle_remove(name, dev);
387}
388
389static int devtmpfsd(void *p)
390{
391 char options[] = "mode=0755";
392 int *err = p;
393 *err = sys_unshare(CLONE_NEWNS);
394 if (*err)
395 goto out;
396 *err = sys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, options);
397 if (*err)
398 goto out;
399 sys_chdir("/.."); /* will traverse into overmounted root */
400 sys_chroot(".");
401 complete(&setup_done);
402 while (1) {
403 spin_lock(&req_lock);
404 while (requests) {
405 struct req *req = requests;
406 requests = NULL;
407 spin_unlock(&req_lock);
408 while (req) {
409 req->err = handle(req->name, req->mode, req->dev);
410 complete(&req->done);
411 req = req->next;
412 }
413 spin_lock(&req_lock);
414 }
415 set_current_state(TASK_INTERRUPTIBLE);
416 spin_unlock(&req_lock);
417 schedule();
418 __set_current_state(TASK_RUNNING);
419 }
420 return 0;
421out:
422 complete(&setup_done);
423 return *err;
424}
425
368/* 426/*
369 * Create devtmpfs instance, driver-core devices will add their device 427 * Create devtmpfs instance, driver-core devices will add their device
370 * nodes here. 428 * nodes here.
371 */ 429 */
372int __init devtmpfs_init(void) 430int __init devtmpfs_init(void)
373{ 431{
374 int err; 432 int err = register_filesystem(&dev_fs_type);
375 struct vfsmount *mnt;
376 char options[] = "mode=0755";
377
378 err = register_filesystem(&dev_fs_type);
379 if (err) { 433 if (err) {
380 printk(KERN_ERR "devtmpfs: unable to register devtmpfs " 434 printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
381 "type %i\n", err); 435 "type %i\n", err);
382 return err; 436 return err;
383 } 437 }
384 438
385 mnt = kern_mount_data(&dev_fs_type, options); 439 thread = kthread_run(devtmpfsd, &err, "kdevtmpfs");
386 if (IS_ERR(mnt)) { 440 if (!IS_ERR(thread)) {
387 err = PTR_ERR(mnt); 441 wait_for_completion(&setup_done);
442 } else {
443 err = PTR_ERR(thread);
444 thread = NULL;
445 }
446
447 if (err) {
388 printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err); 448 printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
389 unregister_filesystem(&dev_fs_type); 449 unregister_filesystem(&dev_fs_type);
390 return err; 450 return err;
391 } 451 }
392 dev_mnt = mnt;
393 452
394 printk(KERN_INFO "devtmpfs: initialized\n"); 453 printk(KERN_INFO "devtmpfs: initialized\n");
395 return 0; 454 return 0;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 07a382eaf0a8..e133f094ab08 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1206,7 +1206,7 @@ static int pkt_start_recovery(struct packet_data *pkt)
1206 if (!sb) 1206 if (!sb)
1207 return 0; 1207 return 0;
1208 1208
1209 if (!sb->s_op || !sb->s_op->relocate_blocks) 1209 if (!sb->s_op->relocate_blocks)
1210 goto out; 1210 goto out;
1211 1211
1212 old_block = pkt->sector / (CD_FRAMESIZE >> 9); 1212 old_block = pkt->sector / (CD_FRAMESIZE >> 9);
diff --git a/drivers/char/generic_nvram.c b/drivers/char/generic_nvram.c
index 0e941b57482e..6c4f4b5a9dd3 100644
--- a/drivers/char/generic_nvram.c
+++ b/drivers/char/generic_nvram.c
@@ -34,12 +34,16 @@ static ssize_t nvram_len;
34static loff_t nvram_llseek(struct file *file, loff_t offset, int origin) 34static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
35{ 35{
36 switch (origin) { 36 switch (origin) {
37 case 0:
38 break;
37 case 1: 39 case 1:
38 offset += file->f_pos; 40 offset += file->f_pos;
39 break; 41 break;
40 case 2: 42 case 2:
41 offset += nvram_len; 43 offset += nvram_len;
42 break; 44 break;
45 default:
46 offset = -1;
43 } 47 }
44 if (offset < 0) 48 if (offset < 0)
45 return -EINVAL; 49 return -EINVAL;
diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
index 166f1e7aaa7e..da3cfee782dc 100644
--- a/drivers/char/nvram.c
+++ b/drivers/char/nvram.c
@@ -224,6 +224,8 @@ static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
224 case 2: 224 case 2:
225 offset += NVRAM_BYTES; 225 offset += NVRAM_BYTES;
226 break; 226 break;
227 default:
228 return -EINVAL;
227 } 229 }
228 230
229 return (offset >= 0) ? (file->f_pos = offset) : -EINVAL; 231 return (offset >= 0) ? (file->f_pos = offset) : -EINVAL;
diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c
index 85c004a518ee..d0c57c2e2909 100644
--- a/drivers/char/ps3flash.c
+++ b/drivers/char/ps3flash.c
@@ -101,12 +101,16 @@ static loff_t ps3flash_llseek(struct file *file, loff_t offset, int origin)
101 101
102 mutex_lock(&file->f_mapping->host->i_mutex); 102 mutex_lock(&file->f_mapping->host->i_mutex);
103 switch (origin) { 103 switch (origin) {
104 case 0:
105 break;
104 case 1: 106 case 1:
105 offset += file->f_pos; 107 offset += file->f_pos;
106 break; 108 break;
107 case 2: 109 case 2:
108 offset += dev->regions[dev->region_idx].size*dev->blk_size; 110 offset += dev->regions[dev->region_idx].size*dev->blk_size;
109 break; 111 break;
112 default:
113 offset = -1;
110 } 114 }
111 if (offset < 0) { 115 if (offset < 0) {
112 res = -EINVAL; 116 res = -EINVAL;
@@ -305,9 +309,14 @@ static int ps3flash_flush(struct file *file, fl_owner_t id)
305 return ps3flash_writeback(ps3flash_dev); 309 return ps3flash_writeback(ps3flash_dev);
306} 310}
307 311
308static int ps3flash_fsync(struct file *file, int datasync) 312static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datasync)
309{ 313{
310 return ps3flash_writeback(ps3flash_dev); 314 struct inode *inode = file->f_path.dentry->d_inode;
315 int err;
316 mutex_lock(&inode->i_mutex);
317 err = ps3flash_writeback(ps3flash_dev);
318 mutex_unlock(&inode->i_mutex);
319 return err;
311} 320}
312 321
313static irqreturn_t ps3flash_interrupt(int irq, void *data) 322static irqreturn_t ps3flash_interrupt(int irq, void *data)
diff --git a/drivers/macintosh/nvram.c b/drivers/macintosh/nvram.c
index a271c8218d82..f0e03e7937e3 100644
--- a/drivers/macintosh/nvram.c
+++ b/drivers/macintosh/nvram.c
@@ -21,12 +21,16 @@
21static loff_t nvram_llseek(struct file *file, loff_t offset, int origin) 21static loff_t nvram_llseek(struct file *file, loff_t offset, int origin)
22{ 22{
23 switch (origin) { 23 switch (origin) {
24 case 0:
25 break;
24 case 1: 26 case 1:
25 offset += file->f_pos; 27 offset += file->f_pos;
26 break; 28 break;
27 case 2: 29 case 2:
28 offset += NVRAM_SIZE; 30 offset += NVRAM_SIZE;
29 break; 31 break;
32 default:
33 offset = -1;
30 } 34 }
31 if (offset < 0) 35 if (offset < 0)
32 return -EINVAL; 36 return -EINVAL;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 91e31e260b4a..dfc9425db70b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6394,16 +6394,11 @@ static void md_seq_stop(struct seq_file *seq, void *v)
6394 mddev_put(mddev); 6394 mddev_put(mddev);
6395} 6395}
6396 6396
6397struct mdstat_info {
6398 int event;
6399};
6400
6401static int md_seq_show(struct seq_file *seq, void *v) 6397static int md_seq_show(struct seq_file *seq, void *v)
6402{ 6398{
6403 mddev_t *mddev = v; 6399 mddev_t *mddev = v;
6404 sector_t sectors; 6400 sector_t sectors;
6405 mdk_rdev_t *rdev; 6401 mdk_rdev_t *rdev;
6406 struct mdstat_info *mi = seq->private;
6407 struct bitmap *bitmap; 6402 struct bitmap *bitmap;
6408 6403
6409 if (v == (void*)1) { 6404 if (v == (void*)1) {
@@ -6415,7 +6410,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
6415 6410
6416 spin_unlock(&pers_lock); 6411 spin_unlock(&pers_lock);
6417 seq_printf(seq, "\n"); 6412 seq_printf(seq, "\n");
6418 mi->event = atomic_read(&md_event_count); 6413 seq->poll_event = atomic_read(&md_event_count);
6419 return 0; 6414 return 0;
6420 } 6415 }
6421 if (v == (void*)2) { 6416 if (v == (void*)2) {
@@ -6527,26 +6522,21 @@ static const struct seq_operations md_seq_ops = {
6527 6522
6528static int md_seq_open(struct inode *inode, struct file *file) 6523static int md_seq_open(struct inode *inode, struct file *file)
6529{ 6524{
6525 struct seq_file *seq;
6530 int error; 6526 int error;
6531 struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
6532 if (mi == NULL)
6533 return -ENOMEM;
6534 6527
6535 error = seq_open(file, &md_seq_ops); 6528 error = seq_open(file, &md_seq_ops);
6536 if (error) 6529 if (error)
6537 kfree(mi); 6530 return error;
6538 else { 6531
6539 struct seq_file *p = file->private_data; 6532 seq = file->private_data;
6540 p->private = mi; 6533 seq->poll_event = atomic_read(&md_event_count);
6541 mi->event = atomic_read(&md_event_count);
6542 }
6543 return error; 6534 return error;
6544} 6535}
6545 6536
6546static unsigned int mdstat_poll(struct file *filp, poll_table *wait) 6537static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
6547{ 6538{
6548 struct seq_file *m = filp->private_data; 6539 struct seq_file *seq = filp->private_data;
6549 struct mdstat_info *mi = m->private;
6550 int mask; 6540 int mask;
6551 6541
6552 poll_wait(filp, &md_event_waiters, wait); 6542 poll_wait(filp, &md_event_waiters, wait);
@@ -6554,7 +6544,7 @@ static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
6554 /* always allow read */ 6544 /* always allow read */
6555 mask = POLLIN | POLLRDNORM; 6545 mask = POLLIN | POLLRDNORM;
6556 6546
6557 if (mi->event != atomic_read(&md_event_count)) 6547 if (seq->poll_event != atomic_read(&md_event_count))
6558 mask |= POLLERR | POLLPRI; 6548 mask |= POLLERR | POLLPRI;
6559 return mask; 6549 return mask;
6560} 6550}
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 191f3bb3c41a..3320a50ba4f0 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -189,12 +189,16 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin)
189 return new_offset; 189 return new_offset;
190} 190}
191 191
192static int vol_cdev_fsync(struct file *file, int datasync) 192static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end, int datasync)
193{ 193{
194 struct ubi_volume_desc *desc = file->private_data; 194 struct ubi_volume_desc *desc = file->private_data;
195 struct ubi_device *ubi = desc->vol->ubi; 195 struct ubi_device *ubi = desc->vol->ubi;
196 196 struct inode *inode = file->f_path.dentry->d_inode;
197 return ubi_sync(ubi->ubi_num); 197 int err;
198 mutex_lock(&inode->i_mutex);
199 err = ubi_sync(ubi->ubi_num);
200 mutex_unlock(&inode->i_mutex);
201 return err;
198} 202}
199 203
200 204
diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c
index 7e9c39951ecb..d6702e57d428 100644
--- a/drivers/sh/clk/core.c
+++ b/drivers/sh/clk/core.c
@@ -670,7 +670,7 @@ static struct dentry *clk_debugfs_root;
670static int clk_debugfs_register_one(struct clk *c) 670static int clk_debugfs_register_one(struct clk *c)
671{ 671{
672 int err; 672 int err;
673 struct dentry *d, *child, *child_tmp; 673 struct dentry *d;
674 struct clk *pa = c->parent; 674 struct clk *pa = c->parent;
675 char s[255]; 675 char s[255];
676 char *p = s; 676 char *p = s;
@@ -699,10 +699,7 @@ static int clk_debugfs_register_one(struct clk *c)
699 return 0; 699 return 0;
700 700
701err_out: 701err_out:
702 d = c->dentry; 702 debugfs_remove_recursive(c->dentry);
703 list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
704 debugfs_remove(child);
705 debugfs_remove(c->dentry);
706 return err; 703 return err;
707} 704}
708 705
diff --git a/drivers/staging/pohmelfs/dir.c b/drivers/staging/pohmelfs/dir.c
index 9732a9666cc4..7598e77672a5 100644
--- a/drivers/staging/pohmelfs/dir.c
+++ b/drivers/staging/pohmelfs/dir.c
@@ -512,7 +512,7 @@ struct dentry *pohmelfs_lookup(struct inode *dir, struct dentry *dentry, struct
512 int err, lock_type = POHMELFS_READ_LOCK, need_lock = 1; 512 int err, lock_type = POHMELFS_READ_LOCK, need_lock = 1;
513 struct qstr str = dentry->d_name; 513 struct qstr str = dentry->d_name;
514 514
515 if ((nd->intent.open.flags & O_ACCMODE) > 1) 515 if ((nd->intent.open.flags & O_ACCMODE) != O_RDONLY)
516 lock_type = POHMELFS_WRITE_LOCK; 516 lock_type = POHMELFS_WRITE_LOCK;
517 517
518 if (test_bit(NETFS_INODE_OWNED, &parent->state)) { 518 if (test_bit(NETFS_INODE_OWNED, &parent->state)) {
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index c0f0ac7c1cdb..f3c6060c96b8 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -887,11 +887,16 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb)
887/* 887/*
888 * We want fsync() to work on POHMELFS. 888 * We want fsync() to work on POHMELFS.
889 */ 889 */
890static int pohmelfs_fsync(struct file *file, int datasync) 890static int pohmelfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
891{ 891{
892 struct inode *inode = file->f_mapping->host; 892 struct inode *inode = file->f_mapping->host;
893 893 int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
894 return sync_inode_metadata(inode, 1); 894 if (!err) {
895 mutex_lock(&inode->i_mutex);
896 err = sync_inode_metadata(inode, 1);
897 mutex_unlock(&inode->i_mutex);
898 }
899 return err;
895} 900}
896 901
897ssize_t pohmelfs_write(struct file *file, const char __user *buf, 902ssize_t pohmelfs_write(struct file *file, const char __user *buf,
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 271ef94668e7..978e6a101bf2 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -795,12 +795,14 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
795} 795}
796 796
797static int 797static int
798printer_fsync(struct file *fd, int datasync) 798printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
799{ 799{
800 struct printer_dev *dev = fd->private_data; 800 struct printer_dev *dev = fd->private_data;
801 struct inode *inode = fd->f_path.dentry->d_inode;
801 unsigned long flags; 802 unsigned long flags;
802 int tx_list_empty; 803 int tx_list_empty;
803 804
805 mutex_lock(&inode->i_mutex);
804 spin_lock_irqsave(&dev->lock, flags); 806 spin_lock_irqsave(&dev->lock, flags);
805 tx_list_empty = (likely(list_empty(&dev->tx_reqs))); 807 tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
806 spin_unlock_irqrestore(&dev->lock, flags); 808 spin_unlock_irqrestore(&dev->lock, flags);
@@ -810,6 +812,7 @@ printer_fsync(struct file *fd, int datasync)
810 wait_event_interruptible(dev->tx_flush_wait, 812 wait_event_interruptible(dev->tx_flush_wait,
811 (likely(list_empty(&dev->tx_reqs_active)))); 813 (likely(list_empty(&dev->tx_reqs_active))));
812 } 814 }
815 mutex_unlock(&inode->i_mutex);
813 816
814 return 0; 817 return 0;
815} 818}
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 804000183c5e..32814e8800e0 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -66,19 +66,26 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma,
66 return 0; 66 return 0;
67} 67}
68 68
69int fb_deferred_io_fsync(struct file *file, int datasync) 69int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync)
70{ 70{
71 struct fb_info *info = file->private_data; 71 struct fb_info *info = file->private_data;
72 struct inode *inode = file->f_path.dentry->d_inode;
73 int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
74 if (err)
75 return err;
72 76
73 /* Skip if deferred io is compiled-in but disabled on this fbdev */ 77 /* Skip if deferred io is compiled-in but disabled on this fbdev */
74 if (!info->fbdefio) 78 if (!info->fbdefio)
75 return 0; 79 return 0;
76 80
81 mutex_lock(&inode->i_mutex);
77 /* Kill off the delayed work */ 82 /* Kill off the delayed work */
78 cancel_delayed_work_sync(&info->deferred_work); 83 cancel_delayed_work_sync(&info->deferred_work);
79 84
80 /* Run it immediately */ 85 /* Run it immediately */
81 return schedule_delayed_work(&info->deferred_work, 0); 86 err = schedule_delayed_work(&info->deferred_work, 0);
87 mutex_unlock(&inode->i_mutex);
88 return err;
82} 89}
83EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); 90EXPORT_SYMBOL_GPL(fb_deferred_io_fsync);
84 91
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 535ab6eccb1a..e98f56d3787d 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -96,12 +96,12 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
96 return acl; 96 return acl;
97} 97}
98 98
99int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags) 99int v9fs_check_acl(struct inode *inode, int mask)
100{ 100{
101 struct posix_acl *acl; 101 struct posix_acl *acl;
102 struct v9fs_session_info *v9ses; 102 struct v9fs_session_info *v9ses;
103 103
104 if (flags & IPERM_FLAG_RCU) 104 if (mask & MAY_NOT_BLOCK)
105 return -ECHILD; 105 return -ECHILD;
106 106
107 v9ses = v9fs_inode2v9ses(inode); 107 v9ses = v9fs_inode2v9ses(inode);
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 7ef3ac9f6d95..59e18c2e8c7e 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,7 +16,7 @@
16 16
17#ifdef CONFIG_9P_FS_POSIX_ACL 17#ifdef CONFIG_9P_FS_POSIX_ACL
18extern int v9fs_get_acl(struct inode *, struct p9_fid *); 18extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags); 19extern int v9fs_check_acl(struct inode *inode, int mask);
20extern int v9fs_acl_chmod(struct dentry *); 20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *, 21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl *, struct posix_acl *); 22 struct posix_acl *, struct posix_acl *);
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 4014160903a9..46ce357ca1ab 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -70,7 +70,8 @@ ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
70ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64); 70ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
71void v9fs_blank_wstat(struct p9_wstat *wstat); 71void v9fs_blank_wstat(struct p9_wstat *wstat);
72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); 72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
73int v9fs_file_fsync_dotl(struct file *filp, int datasync); 73int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
74 int datasync);
74ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *, 75ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
75 const char __user *, size_t, loff_t *, int); 76 const char __user *, size_t, loff_t *, int);
76int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); 77int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index ffed55817f0c..3c173fcc2c5a 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -519,32 +519,50 @@ out:
519} 519}
520 520
521 521
522static int v9fs_file_fsync(struct file *filp, int datasync) 522static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
523 int datasync)
523{ 524{
524 struct p9_fid *fid; 525 struct p9_fid *fid;
526 struct inode *inode = filp->f_mapping->host;
525 struct p9_wstat wstat; 527 struct p9_wstat wstat;
526 int retval; 528 int retval;
527 529
530 retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
531 if (retval)
532 return retval;
533
534 mutex_lock(&inode->i_mutex);
528 P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); 535 P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
529 536
530 fid = filp->private_data; 537 fid = filp->private_data;
531 v9fs_blank_wstat(&wstat); 538 v9fs_blank_wstat(&wstat);
532 539
533 retval = p9_client_wstat(fid, &wstat); 540 retval = p9_client_wstat(fid, &wstat);
541 mutex_unlock(&inode->i_mutex);
542
534 return retval; 543 return retval;
535} 544}
536 545
537int v9fs_file_fsync_dotl(struct file *filp, int datasync) 546int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
547 int datasync)
538{ 548{
539 struct p9_fid *fid; 549 struct p9_fid *fid;
550 struct inode *inode = filp->f_mapping->host;
540 int retval; 551 int retval;
541 552
553 retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
554 if (retval)
555 return retval;
556
557 mutex_lock(&inode->i_mutex);
542 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n", 558 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
543 filp, datasync); 559 filp, datasync);
544 560
545 fid = filp->private_data; 561 fid = filp->private_data;
546 562
547 retval = p9_client_fsync(fid, datasync); 563 retval = p9_client_fsync(fid, datasync);
564 mutex_unlock(&inode->i_mutex);
565
548 return retval; 566 return retval;
549} 567}
550 568
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7f6c67703195..7f9976a866e9 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -633,8 +633,8 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
633 fid = NULL; 633 fid = NULL;
634 v9ses = v9fs_inode2v9ses(dir); 634 v9ses = v9fs_inode2v9ses(dir);
635 perm = unixmode2p9mode(v9ses, mode); 635 perm = unixmode2p9mode(v9ses, mode);
636 if (nd && nd->flags & LOOKUP_OPEN) 636 if (nd)
637 flags = nd->intent.open.flags - 1; 637 flags = nd->intent.open.flags;
638 else 638 else
639 flags = O_RDWR; 639 flags = O_RDWR;
640 640
@@ -649,7 +649,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
649 649
650 v9fs_invalidate_inode_attr(dir); 650 v9fs_invalidate_inode_attr(dir);
651 /* if we are opening a file, assign the open fid to the file */ 651 /* if we are opening a file, assign the open fid to the file */
652 if (nd && nd->flags & LOOKUP_OPEN) { 652 if (nd) {
653 v9inode = V9FS_I(dentry->d_inode); 653 v9inode = V9FS_I(dentry->d_inode);
654 mutex_lock(&v9inode->v_mutex); 654 mutex_lock(&v9inode->v_mutex);
655 if (v9ses->cache && !v9inode->writeback_fid && 655 if (v9ses->cache && !v9inode->writeback_fid &&
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 691c78f58bef..32bbbe5aa689 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -173,8 +173,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
173 struct posix_acl *pacl = NULL, *dacl = NULL; 173 struct posix_acl *pacl = NULL, *dacl = NULL;
174 174
175 v9ses = v9fs_inode2v9ses(dir); 175 v9ses = v9fs_inode2v9ses(dir);
176 if (nd && nd->flags & LOOKUP_OPEN) 176 if (nd)
177 flags = nd->intent.open.flags - 1; 177 flags = nd->intent.open.flags;
178 else { 178 else {
179 /* 179 /*
180 * create call without LOOKUP_OPEN is due 180 * create call without LOOKUP_OPEN is due
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0e95f73a7023..c2b9c79eb64e 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -182,7 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent
182 182
183void affs_free_prealloc(struct inode *inode); 183void affs_free_prealloc(struct inode *inode);
184extern void affs_truncate(struct inode *); 184extern void affs_truncate(struct inode *);
185int affs_file_fsync(struct file *, int); 185int affs_file_fsync(struct file *, loff_t, loff_t, int);
186 186
187/* dir.c */ 187/* dir.c */
188 188
diff --git a/fs/affs/file.c b/fs/affs/file.c
index acf321b70fcd..2f4c935cb327 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -923,14 +923,20 @@ affs_truncate(struct inode *inode)
923 affs_free_prealloc(inode); 923 affs_free_prealloc(inode);
924} 924}
925 925
926int affs_file_fsync(struct file *filp, int datasync) 926int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
927{ 927{
928 struct inode *inode = filp->f_mapping->host; 928 struct inode *inode = filp->f_mapping->host;
929 int ret, err; 929 int ret, err;
930 930
931 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
932 if (err)
933 return err;
934
935 mutex_lock(&inode->i_mutex);
931 ret = write_inode_now(inode, 0); 936 ret = write_inode_now(inode, 0);
932 err = sync_blockdev(inode->i_sb->s_bdev); 937 err = sync_blockdev(inode->i_sb->s_bdev);
933 if (!ret) 938 if (!ret)
934 ret = err; 939 ret = err;
940 mutex_unlock(&inode->i_mutex);
935 return ret; 941 return ret;
936} 942}
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 8bbefe009ed4..800f607ffaf5 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -49,7 +49,7 @@ enum AFSVL_Errors {
49 AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */ 49 AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */
50 AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */ 50 AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */
51 AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */ 51 AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */
52 AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server °ag */ 52 AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server flag */
53 AFSVL_PERM = 363546, /* No permission access */ 53 AFSVL_PERM = 363546, /* No permission access */
54 AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */ 54 AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
55}; 55};
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5a9b6843bac1..d2b0888126d4 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -627,7 +627,7 @@ extern void afs_clear_permits(struct afs_vnode *);
627extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 627extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
628extern void afs_zap_permits(struct rcu_head *); 628extern void afs_zap_permits(struct rcu_head *);
629extern struct key *afs_request_key(struct afs_cell *); 629extern struct key *afs_request_key(struct afs_cell *);
630extern int afs_permission(struct inode *, int, unsigned int); 630extern int afs_permission(struct inode *, int);
631 631
632/* 632/*
633 * server.c 633 * server.c
@@ -750,7 +750,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
750extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, 750extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
751 unsigned long, loff_t); 751 unsigned long, loff_t);
752extern int afs_writeback_all(struct afs_vnode *); 752extern int afs_writeback_all(struct afs_vnode *);
753extern int afs_fsync(struct file *, int); 753extern int afs_fsync(struct file *, loff_t, loff_t, int);
754 754
755 755
756/*****************************************************************************/ 756/*****************************************************************************/
diff --git a/fs/afs/security.c b/fs/afs/security.c
index f44b9d355377..8d010422dc89 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,14 +285,14 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
285 * - AFS ACLs are attached to directories only, and a file is controlled by its 285 * - AFS ACLs are attached to directories only, and a file is controlled by its
286 * parent directory's ACL 286 * parent directory's ACL
287 */ 287 */
288int afs_permission(struct inode *inode, int mask, unsigned int flags) 288int afs_permission(struct inode *inode, int mask)
289{ 289{
290 struct afs_vnode *vnode = AFS_FS_I(inode); 290 struct afs_vnode *vnode = AFS_FS_I(inode);
291 afs_access_t uninitialized_var(access); 291 afs_access_t uninitialized_var(access);
292 struct key *key; 292 struct key *key;
293 int ret; 293 int ret;
294 294
295 if (flags & IPERM_FLAG_RCU) 295 if (mask & MAY_NOT_BLOCK)
296 return -ECHILD; 296 return -ECHILD;
297 297
298 _enter("{{%x:%u},%lx},%x,", 298 _enter("{{%x:%u},%lx},%x,",
@@ -350,7 +350,7 @@ int afs_permission(struct inode *inode, int mask, unsigned int flags)
350 } 350 }
351 351
352 key_put(key); 352 key_put(key);
353 ret = generic_permission(inode, mask, flags, NULL); 353 ret = generic_permission(inode, mask);
354 _leave(" = %d", ret); 354 _leave(" = %d", ret);
355 return ret; 355 return ret;
356 356
diff --git a/fs/afs/write.c b/fs/afs/write.c
index b806285ff853..9aa52d93c73c 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -681,9 +681,10 @@ int afs_writeback_all(struct afs_vnode *vnode)
681 * - the return status from this call provides a reliable indication of 681 * - the return status from this call provides a reliable indication of
682 * whether any write errors occurred for this process. 682 * whether any write errors occurred for this process.
683 */ 683 */
684int afs_fsync(struct file *file, int datasync) 684int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
685{ 685{
686 struct dentry *dentry = file->f_path.dentry; 686 struct dentry *dentry = file->f_path.dentry;
687 struct inode *inode = file->f_mapping->host;
687 struct afs_writeback *wb, *xwb; 688 struct afs_writeback *wb, *xwb;
688 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); 689 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
689 int ret; 690 int ret;
@@ -692,12 +693,19 @@ int afs_fsync(struct file *file, int datasync)
692 vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, 693 vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
693 datasync); 694 datasync);
694 695
696 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
697 if (ret)
698 return ret;
699 mutex_lock(&inode->i_mutex);
700
695 /* use a writeback record as a marker in the queue - when this reaches 701 /* use a writeback record as a marker in the queue - when this reaches
696 * the front of the queue, all the outstanding writes are either 702 * the front of the queue, all the outstanding writes are either
697 * completed or rejected */ 703 * completed or rejected */
698 wb = kzalloc(sizeof(*wb), GFP_KERNEL); 704 wb = kzalloc(sizeof(*wb), GFP_KERNEL);
699 if (!wb) 705 if (!wb) {
700 return -ENOMEM; 706 ret = -ENOMEM;
707 goto out;
708 }
701 wb->vnode = vnode; 709 wb->vnode = vnode;
702 wb->first = 0; 710 wb->first = 0;
703 wb->last = -1; 711 wb->last = -1;
@@ -720,7 +728,7 @@ int afs_fsync(struct file *file, int datasync)
720 if (ret < 0) { 728 if (ret < 0) {
721 afs_put_writeback(wb); 729 afs_put_writeback(wb);
722 _leave(" = %d [wb]", ret); 730 _leave(" = %d [wb]", ret);
723 return ret; 731 goto out;
724 } 732 }
725 733
726 /* wait for the preceding writes to actually complete */ 734 /* wait for the preceding writes to actually complete */
@@ -729,6 +737,8 @@ int afs_fsync(struct file *file, int datasync)
729 vnode->writebacks.next == &wb->link); 737 vnode->writebacks.next == &wb->link);
730 afs_put_writeback(wb); 738 afs_put_writeback(wb);
731 _leave(" = %d", ret); 739 _leave(" = %d", ret);
740out:
741 mutex_unlock(&inode->i_mutex);
732 return ret; 742 return ret;
733} 743}
734 744
diff --git a/fs/attr.c b/fs/attr.c
index caf2aa521e2b..538e27959d3f 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -232,17 +232,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
232 if (error) 232 if (error)
233 return error; 233 return error;
234 234
235 if (ia_valid & ATTR_SIZE)
236 down_write(&dentry->d_inode->i_alloc_sem);
237
238 if (inode->i_op->setattr) 235 if (inode->i_op->setattr)
239 error = inode->i_op->setattr(dentry, attr); 236 error = inode->i_op->setattr(dentry, attr);
240 else 237 else
241 error = simple_setattr(dentry, attr); 238 error = simple_setattr(dentry, attr);
242 239
243 if (ia_valid & ATTR_SIZE)
244 up_write(&dentry->d_inode->i_alloc_sem);
245
246 if (!error) 240 if (!error)
247 fsnotify_change(dentry, ia_valid); 241 fsnotify_change(dentry, ia_valid);
248 242
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index bfcb18feb1df..9205cf25f1c6 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -87,7 +87,8 @@ static int bad_file_release(struct inode *inode, struct file *filp)
87 return -EIO; 87 return -EIO;
88} 88}
89 89
90static int bad_file_fsync(struct file *file, int datasync) 90static int bad_file_fsync(struct file *file, loff_t start, loff_t end,
91 int datasync)
91{ 92{
92 return -EIO; 93 return -EIO;
93} 94}
@@ -229,7 +230,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
229 return -EIO; 230 return -EIO;
230} 231}
231 232
232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) 233static int bad_inode_permission(struct inode *inode, int mask)
233{ 234{
234 return -EIO; 235 return -EIO;
235} 236}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 303983fabfd6..dd0fdfc56d38 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -668,8 +668,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
668 * mm->dumpable = 0 regardless of the interpreter's 668 * mm->dumpable = 0 regardless of the interpreter's
669 * permissions. 669 * permissions.
670 */ 670 */
671 if (file_permission(interpreter, MAY_READ) < 0) 671 would_dump(bprm, interpreter);
672 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
673 672
674 retval = kernel_read(interpreter, 0, bprm->buf, 673 retval = kernel_read(interpreter, 0, bprm->buf,
675 BINPRM_BUF_SIZE); 674 BINPRM_BUF_SIZE);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2bc5dc644b4c..30745f459faf 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -245,8 +245,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
245 * mm->dumpable = 0 regardless of the interpreter's 245 * mm->dumpable = 0 regardless of the interpreter's
246 * permissions. 246 * permissions.
247 */ 247 */
248 if (file_permission(interpreter, MAY_READ) < 0) 248 would_dump(bprm, interpreter);
249 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
250 249
251 retval = kernel_read(interpreter, 0, bprm->buf, 250 retval = kernel_read(interpreter, 0, bprm->buf,
252 BINPRM_BUF_SIZE); 251 BINPRM_BUF_SIZE);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1befe2ec8186..ba1a1ae4a18a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -149,8 +149,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
149 149
150 /* if the binary is not readable than enforce mm->dumpable=0 150 /* if the binary is not readable than enforce mm->dumpable=0
151 regardless of the interpreter's permissions */ 151 regardless of the interpreter's permissions */
152 if (file_permission(bprm->file, MAY_READ)) 152 would_dump(bprm, bprm->file);
153 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
154 153
155 allow_write_access(bprm->file); 154 allow_write_access(bprm->file);
156 bprm->file = NULL; 155 bprm->file = NULL;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 610e8e0b04b8..9fb0b15331d3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -355,25 +355,30 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
355 mutex_lock(&bd_inode->i_mutex); 355 mutex_lock(&bd_inode->i_mutex);
356 size = i_size_read(bd_inode); 356 size = i_size_read(bd_inode);
357 357
358 retval = -EINVAL;
358 switch (origin) { 359 switch (origin) {
359 case 2: 360 case SEEK_END:
360 offset += size; 361 offset += size;
361 break; 362 break;
362 case 1: 363 case SEEK_CUR:
363 offset += file->f_pos; 364 offset += file->f_pos;
365 case SEEK_SET:
366 break;
367 default:
368 goto out;
364 } 369 }
365 retval = -EINVAL;
366 if (offset >= 0 && offset <= size) { 370 if (offset >= 0 && offset <= size) {
367 if (offset != file->f_pos) { 371 if (offset != file->f_pos) {
368 file->f_pos = offset; 372 file->f_pos = offset;
369 } 373 }
370 retval = offset; 374 retval = offset;
371 } 375 }
376out:
372 mutex_unlock(&bd_inode->i_mutex); 377 mutex_unlock(&bd_inode->i_mutex);
373 return retval; 378 return retval;
374} 379}
375 380
376int blkdev_fsync(struct file *filp, int datasync) 381int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
377{ 382{
378 struct inode *bd_inode = filp->f_mapping->host; 383 struct inode *bd_inode = filp->f_mapping->host;
379 struct block_device *bdev = I_BDEV(bd_inode); 384 struct block_device *bdev = I_BDEV(bd_inode);
@@ -384,14 +389,10 @@ int blkdev_fsync(struct file *filp, int datasync)
384 * i_mutex and doing so causes performance issues with concurrent 389 * i_mutex and doing so causes performance issues with concurrent
385 * O_SYNC writers to a block device. 390 * O_SYNC writers to a block device.
386 */ 391 */
387 mutex_unlock(&bd_inode->i_mutex);
388
389 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); 392 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
390 if (error == -EOPNOTSUPP) 393 if (error == -EOPNOTSUPP)
391 error = 0; 394 error = 0;
392 395
393 mutex_lock(&bd_inode->i_mutex);
394
395 return error; 396 return error;
396} 397}
397EXPORT_SYMBOL(blkdev_fsync); 398EXPORT_SYMBOL(blkdev_fsync);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index f66fc9959733..9f62ab2a7282 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -195,14 +195,13 @@ out:
195 return ret; 195 return ret;
196} 196}
197 197
198int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) 198int btrfs_check_acl(struct inode *inode, int mask)
199{ 199{
200 int error = -EAGAIN; 200 int error = -EAGAIN;
201 201
202 if (flags & IPERM_FLAG_RCU) { 202 if (mask & MAY_NOT_BLOCK) {
203 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 203 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
204 error = -ECHILD; 204 error = -ECHILD;
205
206 } else { 205 } else {
207 struct posix_acl *acl; 206 struct posix_acl *acl;
208 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); 207 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3b859a3e6a0e..82be74efbb26 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1219,7 +1219,7 @@ struct btrfs_root {
1219 * right now this just gets used so that a root has its own devid 1219 * right now this just gets used so that a root has its own devid
1220 * for stat. It may be used for more later 1220 * for stat. It may be used for more later
1221 */ 1221 */
1222 struct super_block anon_super; 1222 dev_t anon_dev;
1223}; 1223};
1224 1224
1225struct btrfs_ioctl_defrag_range_args { 1225struct btrfs_ioctl_defrag_range_args {
@@ -2510,6 +2510,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
2510int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 2510int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
2511 struct list_head *list, int search_commit); 2511 struct list_head *list, int search_commit);
2512/* inode.c */ 2512/* inode.c */
2513struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
2514 size_t pg_offset, u64 start, u64 len,
2515 int create);
2513 2516
2514/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 2517/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
2515#if defined(ClearPageFsMisc) && !defined(ClearPageChecked) 2518#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
@@ -2602,7 +2605,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
2602int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, 2605int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
2603 struct inode *inode); 2606 struct inode *inode);
2604int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); 2607int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
2605int btrfs_sync_file(struct file *file, int datasync); 2608int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
2606int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 2609int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2607 int skip_pinned); 2610 int skip_pinned);
2608extern const struct file_operations btrfs_file_operations; 2611extern const struct file_operations btrfs_file_operations;
@@ -2642,7 +2645,7 @@ do { \
2642 2645
2643/* acl.c */ 2646/* acl.c */
2644#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2647#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2645int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); 2648int btrfs_check_acl(struct inode *inode, int mask);
2646#else 2649#else
2647#define btrfs_check_acl NULL 2650#define btrfs_check_acl NULL
2648#endif 2651#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1ac8db5dc0a3..b231ae13b269 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1077,12 +1077,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1077 init_completion(&root->kobj_unregister); 1077 init_completion(&root->kobj_unregister);
1078 root->defrag_running = 0; 1078 root->defrag_running = 0;
1079 root->root_key.objectid = objectid; 1079 root->root_key.objectid = objectid;
1080 root->anon_super.s_root = NULL; 1080 root->anon_dev = 0;
1081 root->anon_super.s_dev = 0;
1082 INIT_LIST_HEAD(&root->anon_super.s_list);
1083 INIT_LIST_HEAD(&root->anon_super.s_instances);
1084 init_rwsem(&root->anon_super.s_umount);
1085
1086 return 0; 1081 return 0;
1087} 1082}
1088 1083
@@ -1311,7 +1306,7 @@ again:
1311 spin_lock_init(&root->cache_lock); 1306 spin_lock_init(&root->cache_lock);
1312 init_waitqueue_head(&root->cache_wait); 1307 init_waitqueue_head(&root->cache_wait);
1313 1308
1314 ret = set_anon_super(&root->anon_super, NULL); 1309 ret = get_anon_bdev(&root->anon_dev);
1315 if (ret) 1310 if (ret)
1316 goto fail; 1311 goto fail;
1317 1312
@@ -2393,10 +2388,8 @@ static void free_fs_root(struct btrfs_root *root)
2393{ 2388{
2394 iput(root->cache_inode); 2389 iput(root->cache_inode);
2395 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2390 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2396 if (root->anon_super.s_dev) { 2391 if (root->anon_dev)
2397 down_write(&root->anon_super.s_umount); 2392 free_anon_bdev(root->anon_dev);
2398 kill_anon_super(&root->anon_super);
2399 }
2400 free_extent_buffer(root->node); 2393 free_extent_buffer(root->node);
2401 free_extent_buffer(root->commit_root); 2394 free_extent_buffer(root->commit_root);
2402 kfree(root->free_ino_ctl); 2395 kfree(root->free_ino_ctl);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fa4ef18b66b1..59cbdb120ad0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1452,7 +1452,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1452 * important optimization for directories because holding the mutex prevents 1452 * important optimization for directories because holding the mutex prevents
1453 * new operations on the dir while we write to disk. 1453 * new operations on the dir while we write to disk.
1454 */ 1454 */
1455int btrfs_sync_file(struct file *file, int datasync) 1455int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1456{ 1456{
1457 struct dentry *dentry = file->f_path.dentry; 1457 struct dentry *dentry = file->f_path.dentry;
1458 struct inode *inode = dentry->d_inode; 1458 struct inode *inode = dentry->d_inode;
@@ -1462,9 +1462,13 @@ int btrfs_sync_file(struct file *file, int datasync)
1462 1462
1463 trace_btrfs_sync_file(file, datasync); 1463 trace_btrfs_sync_file(file, datasync);
1464 1464
1465 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1466 if (ret)
1467 return ret;
1468 mutex_lock(&inode->i_mutex);
1469
1465 /* we wait first, since the writeback may change the inode */ 1470 /* we wait first, since the writeback may change the inode */
1466 root->log_batch++; 1471 root->log_batch++;
1467 /* the VFS called filemap_fdatawrite for us */
1468 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1472 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1469 root->log_batch++; 1473 root->log_batch++;
1470 1474
@@ -1472,8 +1476,10 @@ int btrfs_sync_file(struct file *file, int datasync)
1472 * check the transaction that last modified this inode 1476 * check the transaction that last modified this inode
1473 * and see if its already been committed 1477 * and see if its already been committed
1474 */ 1478 */
1475 if (!BTRFS_I(inode)->last_trans) 1479 if (!BTRFS_I(inode)->last_trans) {
1480 mutex_unlock(&inode->i_mutex);
1476 goto out; 1481 goto out;
1482 }
1477 1483
1478 /* 1484 /*
1479 * if the last transaction that changed this file was before 1485 * if the last transaction that changed this file was before
@@ -1484,6 +1490,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1484 if (BTRFS_I(inode)->last_trans <= 1490 if (BTRFS_I(inode)->last_trans <=
1485 root->fs_info->last_trans_committed) { 1491 root->fs_info->last_trans_committed) {
1486 BTRFS_I(inode)->last_trans = 0; 1492 BTRFS_I(inode)->last_trans = 0;
1493 mutex_unlock(&inode->i_mutex);
1487 goto out; 1494 goto out;
1488 } 1495 }
1489 1496
@@ -1496,12 +1503,15 @@ int btrfs_sync_file(struct file *file, int datasync)
1496 trans = btrfs_start_transaction(root, 0); 1503 trans = btrfs_start_transaction(root, 0);
1497 if (IS_ERR(trans)) { 1504 if (IS_ERR(trans)) {
1498 ret = PTR_ERR(trans); 1505 ret = PTR_ERR(trans);
1506 mutex_unlock(&inode->i_mutex);
1499 goto out; 1507 goto out;
1500 } 1508 }
1501 1509
1502 ret = btrfs_log_dentry_safe(trans, root, dentry); 1510 ret = btrfs_log_dentry_safe(trans, root, dentry);
1503 if (ret < 0) 1511 if (ret < 0) {
1512 mutex_unlock(&inode->i_mutex);
1504 goto out; 1513 goto out;
1514 }
1505 1515
1506 /* we've logged all the items and now have a consistent 1516 /* we've logged all the items and now have a consistent
1507 * version of the file in the log. It is possible that 1517 * version of the file in the log. It is possible that
@@ -1513,7 +1523,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1513 * file again, but that will end up using the synchronization 1523 * file again, but that will end up using the synchronization
1514 * inside btrfs_sync_log to keep things safe. 1524 * inside btrfs_sync_log to keep things safe.
1515 */ 1525 */
1516 mutex_unlock(&dentry->d_inode->i_mutex); 1526 mutex_unlock(&inode->i_mutex);
1517 1527
1518 if (ret != BTRFS_NO_LOG_SYNC) { 1528 if (ret != BTRFS_NO_LOG_SYNC) {
1519 if (ret > 0) { 1529 if (ret > 0) {
@@ -1528,7 +1538,6 @@ int btrfs_sync_file(struct file *file, int datasync)
1528 } else { 1538 } else {
1529 ret = btrfs_end_transaction(trans, root); 1539 ret = btrfs_end_transaction(trans, root);
1530 } 1540 }
1531 mutex_lock(&dentry->d_inode->i_mutex);
1532out: 1541out:
1533 return ret > 0 ? -EIO : ret; 1542 return ret > 0 ? -EIO : ret;
1534} 1543}
@@ -1664,8 +1673,154 @@ out:
1664 return ret; 1673 return ret;
1665} 1674}
1666 1675
1676static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1677{
1678 struct btrfs_root *root = BTRFS_I(inode)->root;
1679 struct extent_map *em;
1680 struct extent_state *cached_state = NULL;
1681 u64 lockstart = *offset;
1682 u64 lockend = i_size_read(inode);
1683 u64 start = *offset;
1684 u64 orig_start = *offset;
1685 u64 len = i_size_read(inode);
1686 u64 last_end = 0;
1687 int ret = 0;
1688
1689 lockend = max_t(u64, root->sectorsize, lockend);
1690 if (lockend <= lockstart)
1691 lockend = lockstart + root->sectorsize;
1692
1693 len = lockend - lockstart + 1;
1694
1695 len = max_t(u64, len, root->sectorsize);
1696 if (inode->i_size == 0)
1697 return -ENXIO;
1698
1699 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
1700 &cached_state, GFP_NOFS);
1701
1702 /*
1703 * Delalloc is such a pain. If we have a hole and we have pending
1704 * delalloc for a portion of the hole we will get back a hole that
1705 * exists for the entire range since it hasn't been actually written
1706 * yet. So to take care of this case we need to look for an extent just
1707 * before the position we want in case there is outstanding delalloc
1708 * going on here.
1709 */
1710 if (origin == SEEK_HOLE && start != 0) {
1711 if (start <= root->sectorsize)
1712 em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
1713 root->sectorsize, 0);
1714 else
1715 em = btrfs_get_extent_fiemap(inode, NULL, 0,
1716 start - root->sectorsize,
1717 root->sectorsize, 0);
1718 if (IS_ERR(em)) {
1719 ret = -ENXIO;
1720 goto out;
1721 }
1722 last_end = em->start + em->len;
1723 if (em->block_start == EXTENT_MAP_DELALLOC)
1724 last_end = min_t(u64, last_end, inode->i_size);
1725 free_extent_map(em);
1726 }
1727
1728 while (1) {
1729 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
1730 if (IS_ERR(em)) {
1731 ret = -ENXIO;
1732 break;
1733 }
1734
1735 if (em->block_start == EXTENT_MAP_HOLE) {
1736 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
1737 if (last_end <= orig_start) {
1738 free_extent_map(em);
1739 ret = -ENXIO;
1740 break;
1741 }
1742 }
1743
1744 if (origin == SEEK_HOLE) {
1745 *offset = start;
1746 free_extent_map(em);
1747 break;
1748 }
1749 } else {
1750 if (origin == SEEK_DATA) {
1751 if (em->block_start == EXTENT_MAP_DELALLOC) {
1752 if (start >= inode->i_size) {
1753 free_extent_map(em);
1754 ret = -ENXIO;
1755 break;
1756 }
1757 }
1758
1759 *offset = start;
1760 free_extent_map(em);
1761 break;
1762 }
1763 }
1764
1765 start = em->start + em->len;
1766 last_end = em->start + em->len;
1767
1768 if (em->block_start == EXTENT_MAP_DELALLOC)
1769 last_end = min_t(u64, last_end, inode->i_size);
1770
1771 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
1772 free_extent_map(em);
1773 ret = -ENXIO;
1774 break;
1775 }
1776 free_extent_map(em);
1777 cond_resched();
1778 }
1779 if (!ret)
1780 *offset = min(*offset, inode->i_size);
1781out:
1782 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
1783 &cached_state, GFP_NOFS);
1784 return ret;
1785}
1786
1787static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
1788{
1789 struct inode *inode = file->f_mapping->host;
1790 int ret;
1791
1792 mutex_lock(&inode->i_mutex);
1793 switch (origin) {
1794 case SEEK_END:
1795 case SEEK_CUR:
1796 offset = generic_file_llseek_unlocked(file, offset, origin);
1797 goto out;
1798 case SEEK_DATA:
1799 case SEEK_HOLE:
1800 ret = find_desired_extent(inode, &offset, origin);
1801 if (ret) {
1802 mutex_unlock(&inode->i_mutex);
1803 return ret;
1804 }
1805 }
1806
1807 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
1808 return -EINVAL;
1809 if (offset > inode->i_sb->s_maxbytes)
1810 return -EINVAL;
1811
1812 /* Special lock needed here? */
1813 if (offset != file->f_pos) {
1814 file->f_pos = offset;
1815 file->f_version = 0;
1816 }
1817out:
1818 mutex_unlock(&inode->i_mutex);
1819 return offset;
1820}
1821
1667const struct file_operations btrfs_file_operations = { 1822const struct file_operations btrfs_file_operations = {
1668 .llseek = generic_file_llseek, 1823 .llseek = btrfs_file_llseek,
1669 .read = do_sync_read, 1824 .read = do_sync_read,
1670 .write = do_sync_write, 1825 .write = do_sync_write,
1671 .aio_read = generic_file_aio_read, 1826 .aio_read = generic_file_aio_read,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3601f0aebddf..2548a04a0230 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4079,13 +4079,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
4079static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 4079static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4080 struct nameidata *nd) 4080 struct nameidata *nd)
4081{ 4081{
4082 struct inode *inode; 4082 return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
4083
4084 inode = btrfs_lookup_dentry(dir, dentry);
4085 if (IS_ERR(inode))
4086 return ERR_CAST(inode);
4087
4088 return d_splice_alias(inode, dentry);
4089} 4083}
4090 4084
4091unsigned char btrfs_filetype_table[] = { 4085unsigned char btrfs_filetype_table[] = {
@@ -4772,11 +4766,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4772 if (err) { 4766 if (err) {
4773 drop_inode = 1; 4767 drop_inode = 1;
4774 } else { 4768 } else {
4775 struct dentry *parent = dget_parent(dentry); 4769 struct dentry *parent = dentry->d_parent;
4776 err = btrfs_update_inode(trans, root, inode); 4770 err = btrfs_update_inode(trans, root, inode);
4777 BUG_ON(err); 4771 BUG_ON(err);
4778 btrfs_log_new_name(trans, inode, NULL, parent); 4772 btrfs_log_new_name(trans, inode, NULL, parent);
4779 dput(parent);
4780 } 4773 }
4781 4774
4782 nr = trans->blocks_used; 4775 nr = trans->blocks_used;
@@ -6900,7 +6893,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
6900{ 6893{
6901 struct inode *inode = dentry->d_inode; 6894 struct inode *inode = dentry->d_inode;
6902 generic_fillattr(inode, stat); 6895 generic_fillattr(inode, stat);
6903 stat->dev = BTRFS_I(inode)->root->anon_super.s_dev; 6896 stat->dev = BTRFS_I(inode)->root->anon_dev;
6904 stat->blksize = PAGE_CACHE_SIZE; 6897 stat->blksize = PAGE_CACHE_SIZE;
6905 stat->blocks = (inode_get_bytes(inode) + 6898 stat->blocks = (inode_get_bytes(inode) +
6906 BTRFS_I(inode)->delalloc_bytes) >> 9; 6899 BTRFS_I(inode)->delalloc_bytes) >> 9;
@@ -7068,9 +7061,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7068 BUG_ON(ret); 7061 BUG_ON(ret);
7069 7062
7070 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { 7063 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
7071 struct dentry *parent = dget_parent(new_dentry); 7064 struct dentry *parent = new_dentry->d_parent;
7072 btrfs_log_new_name(trans, old_inode, old_dir, parent); 7065 btrfs_log_new_name(trans, old_inode, old_dir, parent);
7073 dput(parent);
7074 btrfs_end_log_trans(root); 7066 btrfs_end_log_trans(root);
7075 } 7067 }
7076out_fail: 7068out_fail:
@@ -7331,7 +7323,7 @@ static int btrfs_set_page_dirty(struct page *page)
7331 return __set_page_dirty_nobuffers(page); 7323 return __set_page_dirty_nobuffers(page);
7332} 7324}
7333 7325
7334static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) 7326static int btrfs_permission(struct inode *inode, int mask)
7335{ 7327{
7336 struct btrfs_root *root = BTRFS_I(inode)->root; 7328 struct btrfs_root *root = BTRFS_I(inode)->root;
7337 7329
@@ -7339,7 +7331,7 @@ static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
7339 return -EROFS; 7331 return -EROFS;
7340 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7332 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
7341 return -EACCES; 7333 return -EACCES;
7342 return generic_permission(inode, mask, flags, btrfs_check_acl); 7334 return generic_permission(inode, mask);
7343} 7335}
7344 7336
7345static const struct inode_operations btrfs_dir_inode_operations = { 7337static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7359,10 +7351,12 @@ static const struct inode_operations btrfs_dir_inode_operations = {
7359 .listxattr = btrfs_listxattr, 7351 .listxattr = btrfs_listxattr,
7360 .removexattr = btrfs_removexattr, 7352 .removexattr = btrfs_removexattr,
7361 .permission = btrfs_permission, 7353 .permission = btrfs_permission,
7354 .check_acl = btrfs_check_acl,
7362}; 7355};
7363static const struct inode_operations btrfs_dir_ro_inode_operations = { 7356static const struct inode_operations btrfs_dir_ro_inode_operations = {
7364 .lookup = btrfs_lookup, 7357 .lookup = btrfs_lookup,
7365 .permission = btrfs_permission, 7358 .permission = btrfs_permission,
7359 .check_acl = btrfs_check_acl,
7366}; 7360};
7367 7361
7368static const struct file_operations btrfs_dir_file_operations = { 7362static const struct file_operations btrfs_dir_file_operations = {
@@ -7431,6 +7425,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
7431 .removexattr = btrfs_removexattr, 7425 .removexattr = btrfs_removexattr,
7432 .permission = btrfs_permission, 7426 .permission = btrfs_permission,
7433 .fiemap = btrfs_fiemap, 7427 .fiemap = btrfs_fiemap,
7428 .check_acl = btrfs_check_acl,
7434}; 7429};
7435static const struct inode_operations btrfs_special_inode_operations = { 7430static const struct inode_operations btrfs_special_inode_operations = {
7436 .getattr = btrfs_getattr, 7431 .getattr = btrfs_getattr,
@@ -7440,6 +7435,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
7440 .getxattr = btrfs_getxattr, 7435 .getxattr = btrfs_getxattr,
7441 .listxattr = btrfs_listxattr, 7436 .listxattr = btrfs_listxattr,
7442 .removexattr = btrfs_removexattr, 7437 .removexattr = btrfs_removexattr,
7438 .check_acl = btrfs_check_acl,
7443}; 7439};
7444static const struct inode_operations btrfs_symlink_inode_operations = { 7440static const struct inode_operations btrfs_symlink_inode_operations = {
7445 .readlink = generic_readlink, 7441 .readlink = generic_readlink,
@@ -7451,6 +7447,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7451 .getxattr = btrfs_getxattr, 7447 .getxattr = btrfs_getxattr,
7452 .listxattr = btrfs_listxattr, 7448 .listxattr = btrfs_listxattr,
7453 .removexattr = btrfs_removexattr, 7449 .removexattr = btrfs_removexattr,
7450 .check_acl = btrfs_check_acl,
7454}; 7451};
7455 7452
7456const struct dentry_operations btrfs_dentry_operations = { 7453const struct dentry_operations btrfs_dentry_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a3c4751e07db..622543309eb2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -323,7 +323,7 @@ static noinline int create_subvol(struct btrfs_root *root,
323 struct btrfs_inode_item *inode_item; 323 struct btrfs_inode_item *inode_item;
324 struct extent_buffer *leaf; 324 struct extent_buffer *leaf;
325 struct btrfs_root *new_root; 325 struct btrfs_root *new_root;
326 struct dentry *parent = dget_parent(dentry); 326 struct dentry *parent = dentry->d_parent;
327 struct inode *dir; 327 struct inode *dir;
328 int ret; 328 int ret;
329 int err; 329 int err;
@@ -332,10 +332,8 @@ static noinline int create_subvol(struct btrfs_root *root,
332 u64 index = 0; 332 u64 index = 0;
333 333
334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); 334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
335 if (ret) { 335 if (ret)
336 dput(parent);
337 return ret; 336 return ret;
338 }
339 337
340 dir = parent->d_inode; 338 dir = parent->d_inode;
341 339
@@ -346,10 +344,8 @@ static noinline int create_subvol(struct btrfs_root *root,
346 * 2 - dir items 344 * 2 - dir items
347 */ 345 */
348 trans = btrfs_start_transaction(root, 6); 346 trans = btrfs_start_transaction(root, 6);
349 if (IS_ERR(trans)) { 347 if (IS_ERR(trans))
350 dput(parent);
351 return PTR_ERR(trans); 348 return PTR_ERR(trans);
352 }
353 349
354 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 350 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
355 0, objectid, NULL, 0, 0, 0); 351 0, objectid, NULL, 0, 0, 0);
@@ -439,7 +435,6 @@ static noinline int create_subvol(struct btrfs_root *root,
439 435
440 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 436 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
441fail: 437fail:
442 dput(parent);
443 if (async_transid) { 438 if (async_transid) {
444 *async_transid = trans->transid; 439 *async_transid = trans->transid;
445 err = btrfs_commit_transaction_async(trans, root, 1); 440 err = btrfs_commit_transaction_async(trans, root, 1);
@@ -456,7 +451,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
456 bool readonly) 451 bool readonly)
457{ 452{
458 struct inode *inode; 453 struct inode *inode;
459 struct dentry *parent;
460 struct btrfs_pending_snapshot *pending_snapshot; 454 struct btrfs_pending_snapshot *pending_snapshot;
461 struct btrfs_trans_handle *trans; 455 struct btrfs_trans_handle *trans;
462 int ret; 456 int ret;
@@ -504,9 +498,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
504 if (ret) 498 if (ret)
505 goto fail; 499 goto fail;
506 500
507 parent = dget_parent(dentry); 501 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
508 inode = btrfs_lookup_dentry(parent->d_inode, dentry);
509 dput(parent);
510 if (IS_ERR(inode)) { 502 if (IS_ERR(inode)) {
511 ret = PTR_ERR(inode); 503 ret = PTR_ERR(inode);
512 goto fail; 504 goto fail;
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index a2603e7c0bb5..622f4696e484 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -129,8 +129,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
129 !root->d_inode->i_op->mkdir || 129 !root->d_inode->i_op->mkdir ||
130 !root->d_inode->i_op->setxattr || 130 !root->d_inode->i_op->setxattr ||
131 !root->d_inode->i_op->getxattr || 131 !root->d_inode->i_op->getxattr ||
132 !root->d_sb ||
133 !root->d_sb->s_op ||
134 !root->d_sb->s_op->statfs || 132 !root->d_sb->s_op->statfs ||
135 !root->d_sb->s_op->sync_fs) 133 !root->d_sb->s_op->sync_fs)
136 goto error_unsupported; 134 goto error_unsupported;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f605753c8fe9..8d74ad7ba556 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1811,7 +1811,7 @@ out:
1811 spin_unlock(&ci->i_unsafe_lock); 1811 spin_unlock(&ci->i_unsafe_lock);
1812} 1812}
1813 1813
1814int ceph_fsync(struct file *file, int datasync) 1814int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1815{ 1815{
1816 struct inode *inode = file->f_mapping->host; 1816 struct inode *inode = file->f_mapping->host;
1817 struct ceph_inode_info *ci = ceph_inode(inode); 1817 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1822,9 +1822,10 @@ int ceph_fsync(struct file *file, int datasync)
1822 dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); 1822 dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
1823 sync_write_wait(inode); 1823 sync_write_wait(inode);
1824 1824
1825 ret = filemap_write_and_wait(inode->i_mapping); 1825 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1826 if (ret < 0) 1826 if (ret < 0)
1827 return ret; 1827 return ret;
1828 mutex_lock(&inode->i_mutex);
1828 1829
1829 dirty = try_flush_caps(inode, NULL, &flush_tid); 1830 dirty = try_flush_caps(inode, NULL, &flush_tid);
1830 dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); 1831 dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -1841,6 +1842,7 @@ int ceph_fsync(struct file *file, int datasync)
1841 } 1842 }
1842 1843
1843 dout("fsync %p%s done\n", inode, datasync ? " datasync" : ""); 1844 dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
1845 mutex_unlock(&inode->i_mutex);
1844 return ret; 1846 return ret;
1845} 1847}
1846 1848
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ef8f08c343e8..1065ac779840 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -252,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
252 off = 1; 252 off = 1;
253 } 253 }
254 if (filp->f_pos == 1) { 254 if (filp->f_pos == 1) {
255 ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino; 255 ino_t ino = parent_ino(filp->f_dentry);
256 dout("readdir off 1 -> '..'\n"); 256 dout("readdir off 1 -> '..'\n");
257 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), 257 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
258 ceph_translate_ino(inode->i_sb, ino), 258 ceph_translate_ino(inode->i_sb, ino),
@@ -446,14 +446,19 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
446 loff_t retval; 446 loff_t retval;
447 447
448 mutex_lock(&inode->i_mutex); 448 mutex_lock(&inode->i_mutex);
449 retval = -EINVAL;
449 switch (origin) { 450 switch (origin) {
450 case SEEK_END: 451 case SEEK_END:
451 offset += inode->i_size + 2; /* FIXME */ 452 offset += inode->i_size + 2; /* FIXME */
452 break; 453 break;
453 case SEEK_CUR: 454 case SEEK_CUR:
454 offset += file->f_pos; 455 offset += file->f_pos;
456 case SEEK_SET:
457 break;
458 default:
459 goto out;
455 } 460 }
456 retval = -EINVAL; 461
457 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 462 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
458 if (offset != file->f_pos) { 463 if (offset != file->f_pos) {
459 file->f_pos = offset; 464 file->f_pos = offset;
@@ -477,6 +482,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
477 if (offset > old_offset) 482 if (offset > old_offset)
478 fi->dir_release_count--; 483 fi->dir_release_count--;
479 } 484 }
485out:
480 mutex_unlock(&inode->i_mutex); 486 mutex_unlock(&inode->i_mutex);
481 return retval; 487 return retval;
482} 488}
@@ -566,7 +572,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
566 /* open (but not create!) intent? */ 572 /* open (but not create!) intent? */
567 if (nd && 573 if (nd &&
568 (nd->flags & LOOKUP_OPEN) && 574 (nd->flags & LOOKUP_OPEN) &&
569 (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */
570 !(nd->intent.open.flags & O_CREAT)) { 575 !(nd->intent.open.flags & O_CREAT)) {
571 int mode = nd->intent.open.create_mode & ~current->fs->umask; 576 int mode = nd->intent.open.create_mode & ~current->fs->umask;
572 return ceph_lookup_open(dir, dentry, nd, mode, 1); 577 return ceph_lookup_open(dir, dentry, nd, mode, 1);
@@ -1113,7 +1118,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1113 * an fsync() on a dir will wait for any uncommitted directory 1118 * an fsync() on a dir will wait for any uncommitted directory
1114 * operations to commit. 1119 * operations to commit.
1115 */ 1120 */
1116static int ceph_dir_fsync(struct file *file, int datasync) 1121static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
1122 int datasync)
1117{ 1123{
1118 struct inode *inode = file->f_path.dentry->d_inode; 1124 struct inode *inode = file->f_path.dentry->d_inode;
1119 struct ceph_inode_info *ci = ceph_inode(inode); 1125 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1123,6 +1129,11 @@ static int ceph_dir_fsync(struct file *file, int datasync)
1123 int ret = 0; 1129 int ret = 0;
1124 1130
1125 dout("dir_fsync %p\n", inode); 1131 dout("dir_fsync %p\n", inode);
1132 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1133 if (ret)
1134 return ret;
1135 mutex_lock(&inode->i_mutex);
1136
1126 spin_lock(&ci->i_unsafe_lock); 1137 spin_lock(&ci->i_unsafe_lock);
1127 if (list_empty(head)) 1138 if (list_empty(head))
1128 goto out; 1139 goto out;
@@ -1156,6 +1167,8 @@ static int ceph_dir_fsync(struct file *file, int datasync)
1156 } while (req->r_tid < last_tid); 1167 } while (req->r_tid < last_tid);
1157out: 1168out:
1158 spin_unlock(&ci->i_unsafe_lock); 1169 spin_unlock(&ci->i_unsafe_lock);
1170 mutex_unlock(&inode->i_mutex);
1171
1159 return ret; 1172 return ret;
1160} 1173}
1161 1174
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4698a5c553dc..0d0eae05598f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -226,7 +226,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
226 struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); 226 struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
227 struct ceph_mds_request *req; 227 struct ceph_mds_request *req;
228 int err; 228 int err;
229 int flags = nd->intent.open.flags - 1; /* silly vfs! */ 229 int flags = nd->intent.open.flags;
230 230
231 dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", 231 dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
232 dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); 232 dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@@ -768,13 +768,16 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
768 768
769 mutex_lock(&inode->i_mutex); 769 mutex_lock(&inode->i_mutex);
770 __ceph_do_pending_vmtruncate(inode); 770 __ceph_do_pending_vmtruncate(inode);
771 switch (origin) { 771 if (origin != SEEK_CUR || origin != SEEK_SET) {
772 case SEEK_END:
773 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 772 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
774 if (ret < 0) { 773 if (ret < 0) {
775 offset = ret; 774 offset = ret;
776 goto out; 775 goto out;
777 } 776 }
777 }
778
779 switch (origin) {
780 case SEEK_END:
778 offset += inode->i_size; 781 offset += inode->i_size;
779 break; 782 break;
780 case SEEK_CUR: 783 case SEEK_CUR:
@@ -790,6 +793,19 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
790 } 793 }
791 offset += file->f_pos; 794 offset += file->f_pos;
792 break; 795 break;
796 case SEEK_DATA:
797 if (offset >= inode->i_size) {
798 ret = -ENXIO;
799 goto out;
800 }
801 break;
802 case SEEK_HOLE:
803 if (offset >= inode->i_size) {
804 ret = -ENXIO;
805 goto out;
806 }
807 offset = inode->i_size;
808 break;
793 } 809 }
794 810
795 if (offset < 0 || offset > inode->i_sb->s_maxbytes) { 811 if (offset < 0 || offset > inode->i_sb->s_maxbytes) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d8858e96ab18..dfb2831d8d85 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1795,17 +1795,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
1795 * Check inode permissions. We verify we have a valid value for 1795 * Check inode permissions. We verify we have a valid value for
1796 * the AUTH cap, then call the generic handler. 1796 * the AUTH cap, then call the generic handler.
1797 */ 1797 */
1798int ceph_permission(struct inode *inode, int mask, unsigned int flags) 1798int ceph_permission(struct inode *inode, int mask)
1799{ 1799{
1800 int err; 1800 int err;
1801 1801
1802 if (flags & IPERM_FLAG_RCU) 1802 if (mask & MAY_NOT_BLOCK)
1803 return -ECHILD; 1803 return -ECHILD;
1804 1804
1805 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); 1805 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
1806 1806
1807 if (!err) 1807 if (!err)
1808 err = generic_permission(inode, mask, flags, NULL); 1808 err = generic_permission(inode, mask);
1809 return err; 1809 return err;
1810} 1810}
1811 1811
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f5cabefa98dc..30446b144e3d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -692,7 +692,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
692extern void ceph_queue_writeback(struct inode *inode); 692extern void ceph_queue_writeback(struct inode *inode);
693 693
694extern int ceph_do_getattr(struct inode *inode, int mask); 694extern int ceph_do_getattr(struct inode *inode, int mask);
695extern int ceph_permission(struct inode *inode, int mask, unsigned int flags); 695extern int ceph_permission(struct inode *inode, int mask);
696extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); 696extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
697extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 697extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
698 struct kstat *stat); 698 struct kstat *stat);
@@ -728,7 +728,8 @@ extern void ceph_put_cap(struct ceph_mds_client *mdsc,
728 728
729extern void ceph_queue_caps_release(struct inode *inode); 729extern void ceph_queue_caps_release(struct inode *inode);
730extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); 730extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
731extern int ceph_fsync(struct file *file, int datasync); 731extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
732 int datasync);
732extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, 733extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
733 struct ceph_mds_session *session); 734 struct ceph_mds_session *session);
734extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, 735extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index bc4b12ca537b..865517470967 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -224,7 +224,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
224 return 0; 224 return 0;
225} 225}
226 226
227static int cifs_permission(struct inode *inode, int mask, unsigned int flags) 227static int cifs_permission(struct inode *inode, int mask)
228{ 228{
229 struct cifs_sb_info *cifs_sb; 229 struct cifs_sb_info *cifs_sb;
230 230
@@ -239,7 +239,7 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
239 on the client (above and beyond ACL on servers) for 239 on the client (above and beyond ACL on servers) for
240 servers which do not support setting and viewing mode bits, 240 servers which do not support setting and viewing mode bits,
241 so allowing client to check permissions is useful */ 241 so allowing client to check permissions is useful */
242 return generic_permission(inode, mask, flags, NULL); 242 return generic_permission(inode, mask);
243} 243}
244 244
245static struct kmem_cache *cifs_inode_cachep; 245static struct kmem_cache *cifs_inode_cachep;
@@ -704,8 +704,11 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
704 704
705static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) 705static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
706{ 706{
707 /* origin == SEEK_END => we must revalidate the cached file length */ 707 /*
708 if (origin == SEEK_END) { 708 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
709 * the cached file length
710 */
711 if (origin != SEEK_SET || origin != SEEK_CUR) {
709 int rc; 712 int rc;
710 struct inode *inode = file->f_path.dentry->d_inode; 713 struct inode *inode = file->f_path.dentry->d_inode;
711 714
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 036ca83e5f46..fbd050c8d52a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -91,8 +91,8 @@ extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
92 unsigned long nr_segs, loff_t pos); 92 unsigned long nr_segs, loff_t pos);
93extern int cifs_lock(struct file *, int, struct file_lock *); 93extern int cifs_lock(struct file *, int, struct file_lock *);
94extern int cifs_fsync(struct file *, int); 94extern int cifs_fsync(struct file *, loff_t, loff_t, int);
95extern int cifs_strict_fsync(struct file *, int); 95extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int);
96extern int cifs_flush(struct file *, fl_owner_t id); 96extern int cifs_flush(struct file *, fl_owner_t id);
97extern int cifs_file_mmap(struct file * , struct vm_area_struct *); 97extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
98extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); 98extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ccc1afa0bf3b..e66297bad412 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -320,9 +320,10 @@ requeue_echo:
320} 320}
321 321
322static int 322static int
323cifs_demultiplex_thread(struct TCP_Server_Info *server) 323cifs_demultiplex_thread(void *p)
324{ 324{
325 int length; 325 int length;
326 struct TCP_Server_Info *server = p;
326 unsigned int pdu_length, total_read; 327 unsigned int pdu_length, total_read;
327 struct smb_hdr *smb_buffer = NULL; 328 struct smb_hdr *smb_buffer = NULL;
328 struct smb_hdr *bigbuf = NULL; 329 struct smb_hdr *bigbuf = NULL;
@@ -1791,7 +1792,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1791 * this will succeed. No need for try_module_get(). 1792 * this will succeed. No need for try_module_get().
1792 */ 1793 */
1793 __module_get(THIS_MODULE); 1794 __module_get(THIS_MODULE);
1794 tcp_ses->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread, 1795 tcp_ses->tsk = kthread_run(cifs_demultiplex_thread,
1795 tcp_ses, "cifsd"); 1796 tcp_ses, "cifsd");
1796 if (IS_ERR(tcp_ses->tsk)) { 1797 if (IS_ERR(tcp_ses->tsk)) {
1797 rc = PTR_ERR(tcp_ses->tsk); 1798 rc = PTR_ERR(tcp_ses->tsk);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fa8c21d913bc..14d602f178c2 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -179,7 +179,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
179 if (oplockEnabled) 179 if (oplockEnabled)
180 oplock = REQ_OPLOCK; 180 oplock = REQ_OPLOCK;
181 181
182 if (nd && (nd->flags & LOOKUP_OPEN)) 182 if (nd)
183 oflags = nd->intent.open.file->f_flags; 183 oflags = nd->intent.open.file->f_flags;
184 else 184 else
185 oflags = O_RDONLY | O_CREAT; 185 oflags = O_RDONLY | O_CREAT;
@@ -214,7 +214,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
214 which should be rare for path not covered on files) */ 214 which should be rare for path not covered on files) */
215 } 215 }
216 216
217 if (nd && (nd->flags & LOOKUP_OPEN)) { 217 if (nd) {
218 /* if the file is going to stay open, then we 218 /* if the file is going to stay open, then we
219 need to set the desired access properly */ 219 need to set the desired access properly */
220 desiredAccess = 0; 220 desiredAccess = 0;
@@ -328,7 +328,7 @@ cifs_create_set_dentry:
328 else 328 else
329 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); 329 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
330 330
331 if (newinode && nd && (nd->flags & LOOKUP_OPEN)) { 331 if (newinode && nd) {
332 struct cifsFileInfo *pfile_info; 332 struct cifsFileInfo *pfile_info;
333 struct file *filp; 333 struct file *filp;
334 334
@@ -568,7 +568,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
568 * reduction in network traffic in the other paths. 568 * reduction in network traffic in the other paths.
569 */ 569 */
570 if (pTcon->unix_ext) { 570 if (pTcon->unix_ext) {
571 if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && 571 if (nd && !(nd->flags & LOOKUP_DIRECTORY) &&
572 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && 572 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
573 (nd->intent.open.file->f_flags & O_CREAT)) { 573 (nd->intent.open.file->f_flags & O_CREAT)) {
574 rc = cifs_posix_open(full_path, &newInode, 574 rc = cifs_posix_open(full_path, &newInode,
@@ -663,10 +663,8 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
663 * case sensitive name which is specified by user if this is 663 * case sensitive name which is specified by user if this is
664 * for creation. 664 * for creation.
665 */ 665 */
666 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { 666 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
667 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 667 return 0;
668 return 0;
669 }
670 668
671 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) 669 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
672 return 0; 670 return 0;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a9b4a24f2a16..378acdafa356 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1401,7 +1401,8 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
1401 return rc; 1401 return rc;
1402} 1402}
1403 1403
1404int cifs_strict_fsync(struct file *file, int datasync) 1404int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
1405 int datasync)
1405{ 1406{
1406 int xid; 1407 int xid;
1407 int rc = 0; 1408 int rc = 0;
@@ -1410,6 +1411,11 @@ int cifs_strict_fsync(struct file *file, int datasync)
1410 struct inode *inode = file->f_path.dentry->d_inode; 1411 struct inode *inode = file->f_path.dentry->d_inode;
1411 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1412 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1412 1413
1414 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1415 if (rc)
1416 return rc;
1417 mutex_lock(&inode->i_mutex);
1418
1413 xid = GetXid(); 1419 xid = GetXid();
1414 1420
1415 cFYI(1, "Sync file - name: %s datasync: 0x%x", 1421 cFYI(1, "Sync file - name: %s datasync: 0x%x",
@@ -1428,16 +1434,23 @@ int cifs_strict_fsync(struct file *file, int datasync)
1428 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 1434 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1429 1435
1430 FreeXid(xid); 1436 FreeXid(xid);
1437 mutex_unlock(&inode->i_mutex);
1431 return rc; 1438 return rc;
1432} 1439}
1433 1440
1434int cifs_fsync(struct file *file, int datasync) 1441int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1435{ 1442{
1436 int xid; 1443 int xid;
1437 int rc = 0; 1444 int rc = 0;
1438 struct cifs_tcon *tcon; 1445 struct cifs_tcon *tcon;
1439 struct cifsFileInfo *smbfile = file->private_data; 1446 struct cifsFileInfo *smbfile = file->private_data;
1440 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1447 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1448 struct inode *inode = file->f_mapping->host;
1449
1450 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1451 if (rc)
1452 return rc;
1453 mutex_lock(&inode->i_mutex);
1441 1454
1442 xid = GetXid(); 1455 xid = GetXid();
1443 1456
@@ -1449,6 +1462,7 @@ int cifs_fsync(struct file *file, int datasync)
1449 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 1462 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1450 1463
1451 FreeXid(xid); 1464 FreeXid(xid);
1465 mutex_unlock(&inode->i_mutex);
1452 return rc; 1466 return rc;
1453} 1467}
1454 1468
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 6751e745bbc6..965a3af186a1 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -796,7 +796,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
796 file->f_pos++; 796 file->f_pos++;
797 case 1: 797 case 1:
798 if (filldir(direntry, "..", 2, file->f_pos, 798 if (filldir(direntry, "..", 2, file->f_pos,
799 file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 799 parent_ino(file->f_path.dentry), DT_DIR) < 0) {
800 cERROR(1, "Filldir for parent dir failed"); 800 cERROR(1, "Filldir for parent dir failed");
801 rc = -ENOMEM; 801 rc = -ENOMEM;
802 break; 802 break;
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index 6b443ff43a19..b7143cf783ac 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -11,7 +11,7 @@ extern int coda_fake_statfs;
11 11
12void coda_destroy_inodecache(void); 12void coda_destroy_inodecache(void);
13int coda_init_inodecache(void); 13int coda_init_inodecache(void);
14int coda_fsync(struct file *coda_file, int datasync); 14int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync);
15void coda_sysctl_init(void); 15void coda_sysctl_init(void);
16void coda_sysctl_clean(void); 16void coda_sysctl_clean(void);
17 17
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 9b0c5323890b..44e17e9c21ae 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -39,7 +39,7 @@ extern const struct file_operations coda_ioctl_operations;
39/* operations shared over more than one file */ 39/* operations shared over more than one file */
40int coda_open(struct inode *i, struct file *f); 40int coda_open(struct inode *i, struct file *f);
41int coda_release(struct inode *i, struct file *f); 41int coda_release(struct inode *i, struct file *f);
42int coda_permission(struct inode *inode, int mask, unsigned int flags); 42int coda_permission(struct inode *inode, int mask);
43int coda_revalidate_inode(struct dentry *); 43int coda_revalidate_inode(struct dentry *);
44int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); 44int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
45int coda_setattr(struct dentry *, struct iattr *); 45int coda_setattr(struct dentry *, struct iattr *);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 2b8dae4d121e..0239433f50cb 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -132,11 +132,11 @@ exit:
132} 132}
133 133
134 134
135int coda_permission(struct inode *inode, int mask, unsigned int flags) 135int coda_permission(struct inode *inode, int mask)
136{ 136{
137 int error; 137 int error;
138 138
139 if (flags & IPERM_FLAG_RCU) 139 if (mask & MAY_NOT_BLOCK)
140 return -ECHILD; 140 return -ECHILD;
141 141
142 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 142 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -449,8 +449,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf,
449 struct file *host_file; 449 struct file *host_file;
450 struct dentry *de; 450 struct dentry *de;
451 struct venus_dirent *vdir; 451 struct venus_dirent *vdir;
452 unsigned long vdir_size = 452 unsigned long vdir_size = offsetof(struct venus_dirent, d_name);
453 (unsigned long)(&((struct venus_dirent *)0)->d_name);
454 unsigned int type; 453 unsigned int type;
455 struct qstr name; 454 struct qstr name;
456 ino_t ino; 455 ino_t ino;
@@ -474,7 +473,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf,
474 coda_file->f_pos++; 473 coda_file->f_pos++;
475 } 474 }
476 if (coda_file->f_pos == 1) { 475 if (coda_file->f_pos == 1) {
477 ret = filldir(buf, "..", 2, 1, de->d_parent->d_inode->i_ino, DT_DIR); 476 ret = filldir(buf, "..", 2, 1, parent_ino(de), DT_DIR);
478 if (ret < 0) 477 if (ret < 0)
479 goto out; 478 goto out;
480 result++; 479 result++;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 0433057be330..8edd404e6419 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -199,7 +199,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
199 return 0; 199 return 0;
200} 200}
201 201
202int coda_fsync(struct file *coda_file, int datasync) 202int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
203{ 203{
204 struct file *host_file; 204 struct file *host_file;
205 struct inode *coda_inode = coda_file->f_path.dentry->d_inode; 205 struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
@@ -210,6 +210,11 @@ int coda_fsync(struct file *coda_file, int datasync)
210 S_ISLNK(coda_inode->i_mode))) 210 S_ISLNK(coda_inode->i_mode)))
211 return -EINVAL; 211 return -EINVAL;
212 212
213 err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
214 if (err)
215 return err;
216 mutex_lock(&coda_inode->i_mutex);
217
213 cfi = CODA_FTOC(coda_file); 218 cfi = CODA_FTOC(coda_file);
214 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 219 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
215 host_file = cfi->cfi_container; 220 host_file = cfi->cfi_container;
@@ -217,6 +222,7 @@ int coda_fsync(struct file *coda_file, int datasync)
217 err = vfs_fsync(host_file, datasync); 222 err = vfs_fsync(host_file, datasync);
218 if (!err && !datasync) 223 if (!err && !datasync)
219 err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); 224 err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
225 mutex_unlock(&coda_inode->i_mutex);
220 226
221 return err; 227 return err;
222} 228}
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index cb140ef293e4..ee0981f1375b 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
24#include "coda_linux.h" 24#include "coda_linux.h"
25 25
26/* pioctl ops */ 26/* pioctl ops */
27static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags); 27static int coda_ioctl_permission(struct inode *inode, int mask);
28static long coda_pioctl(struct file *filp, unsigned int cmd, 28static long coda_pioctl(struct file *filp, unsigned int cmd,
29 unsigned long user_data); 29 unsigned long user_data);
30 30
@@ -41,7 +41,7 @@ const struct file_operations coda_ioctl_operations = {
41}; 41};
42 42
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) 44static int coda_ioctl_permission(struct inode *inode, int mask)
45{ 45{
46 return (mask & MAY_EXEC) ? -EACCES : 0; 46 return (mask & MAY_EXEC) ? -EACCES : 0;
47} 47}
diff --git a/fs/dcache.c b/fs/dcache.c
index fbdcbca40725..be18598c7fd7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -344,6 +344,24 @@ void d_drop(struct dentry *dentry)
344EXPORT_SYMBOL(d_drop); 344EXPORT_SYMBOL(d_drop);
345 345
346/* 346/*
347 * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
348 * @dentry: dentry to drop
349 *
350 * This is called when we do a lookup on a placeholder dentry that needed to be
351 * looked up. The dentry should have been hashed in order for it to be found by
352 * the lookup code, but now needs to be unhashed while we do the actual lookup
353 * and clear the DCACHE_NEED_LOOKUP flag.
354 */
355void d_clear_need_lookup(struct dentry *dentry)
356{
357 spin_lock(&dentry->d_lock);
358 __d_drop(dentry);
359 dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
360 spin_unlock(&dentry->d_lock);
361}
362EXPORT_SYMBOL(d_clear_need_lookup);
363
364/*
347 * Finish off a dentry we've decided to kill. 365 * Finish off a dentry we've decided to kill.
348 * dentry->d_lock must be held, returns with it unlocked. 366 * dentry->d_lock must be held, returns with it unlocked.
349 * If ref is non-zero, then decrement the refcount too. 367 * If ref is non-zero, then decrement the refcount too.
@@ -432,8 +450,13 @@ repeat:
432 if (d_unhashed(dentry)) 450 if (d_unhashed(dentry))
433 goto kill_it; 451 goto kill_it;
434 452
435 /* Otherwise leave it cached and ensure it's on the LRU */ 453 /*
436 dentry->d_flags |= DCACHE_REFERENCED; 454 * If this dentry needs lookup, don't set the referenced flag so that it
455 * is more likely to be cleaned up by the dcache shrinker in case of
456 * memory pressure.
457 */
458 if (!d_need_lookup(dentry))
459 dentry->d_flags |= DCACHE_REFERENCED;
437 dentry_lru_add(dentry); 460 dentry_lru_add(dentry);
438 461
439 dentry->d_count--; 462 dentry->d_count--;
@@ -526,10 +549,6 @@ repeat:
526 */ 549 */
527 rcu_read_lock(); 550 rcu_read_lock();
528 ret = dentry->d_parent; 551 ret = dentry->d_parent;
529 if (!ret) {
530 rcu_read_unlock();
531 goto out;
532 }
533 spin_lock(&ret->d_lock); 552 spin_lock(&ret->d_lock);
534 if (unlikely(ret != dentry->d_parent)) { 553 if (unlikely(ret != dentry->d_parent)) {
535 spin_unlock(&ret->d_lock); 554 spin_unlock(&ret->d_lock);
@@ -540,7 +559,6 @@ repeat:
540 BUG_ON(!ret->d_count); 559 BUG_ON(!ret->d_count);
541 ret->d_count++; 560 ret->d_count++;
542 spin_unlock(&ret->d_lock); 561 spin_unlock(&ret->d_lock);
543out:
544 return ret; 562 return ret;
545} 563}
546EXPORT_SYMBOL(dget_parent); 564EXPORT_SYMBOL(dget_parent);
@@ -720,13 +738,11 @@ static void shrink_dentry_list(struct list_head *list)
720 * 738 *
721 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. 739 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
722 */ 740 */
723static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) 741static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
724{ 742{
725 /* called from prune_dcache() and shrink_dcache_parent() */
726 struct dentry *dentry; 743 struct dentry *dentry;
727 LIST_HEAD(referenced); 744 LIST_HEAD(referenced);
728 LIST_HEAD(tmp); 745 LIST_HEAD(tmp);
729 int cnt = *count;
730 746
731relock: 747relock:
732 spin_lock(&dcache_lru_lock); 748 spin_lock(&dcache_lru_lock);
@@ -754,7 +770,7 @@ relock:
754 } else { 770 } else {
755 list_move_tail(&dentry->d_lru, &tmp); 771 list_move_tail(&dentry->d_lru, &tmp);
756 spin_unlock(&dentry->d_lock); 772 spin_unlock(&dentry->d_lock);
757 if (!--cnt) 773 if (!--count)
758 break; 774 break;
759 } 775 }
760 cond_resched_lock(&dcache_lru_lock); 776 cond_resched_lock(&dcache_lru_lock);
@@ -764,83 +780,22 @@ relock:
764 spin_unlock(&dcache_lru_lock); 780 spin_unlock(&dcache_lru_lock);
765 781
766 shrink_dentry_list(&tmp); 782 shrink_dentry_list(&tmp);
767
768 *count = cnt;
769} 783}
770 784
771/** 785/**
772 * prune_dcache - shrink the dcache 786 * prune_dcache_sb - shrink the dcache
773 * @count: number of entries to try to free 787 * @nr_to_scan: number of entries to try to free
774 * 788 *
775 * Shrink the dcache. This is done when we need more memory, or simply when we 789 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
776 * need to unmount something (at which point we need to unuse all dentries). 790 * done when we need more memory an called from the superblock shrinker
791 * function.
777 * 792 *
778 * This function may fail to free any resources if all the dentries are in use. 793 * This function may fail to free any resources if all the dentries are in
794 * use.
779 */ 795 */
780static void prune_dcache(int count) 796void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
781{ 797{
782 struct super_block *sb, *p = NULL; 798 __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
783 int w_count;
784 int unused = dentry_stat.nr_unused;
785 int prune_ratio;
786 int pruned;
787
788 if (unused == 0 || count == 0)
789 return;
790 if (count >= unused)
791 prune_ratio = 1;
792 else
793 prune_ratio = unused / count;
794 spin_lock(&sb_lock);
795 list_for_each_entry(sb, &super_blocks, s_list) {
796 if (list_empty(&sb->s_instances))
797 continue;
798 if (sb->s_nr_dentry_unused == 0)
799 continue;
800 sb->s_count++;
801 /* Now, we reclaim unused dentrins with fairness.
802 * We reclaim them same percentage from each superblock.
803 * We calculate number of dentries to scan on this sb
804 * as follows, but the implementation is arranged to avoid
805 * overflows:
806 * number of dentries to scan on this sb =
807 * count * (number of dentries on this sb /
808 * number of dentries in the machine)
809 */
810 spin_unlock(&sb_lock);
811 if (prune_ratio != 1)
812 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
813 else
814 w_count = sb->s_nr_dentry_unused;
815 pruned = w_count;
816 /*
817 * We need to be sure this filesystem isn't being unmounted,
818 * otherwise we could race with generic_shutdown_super(), and
819 * end up holding a reference to an inode while the filesystem
820 * is unmounted. So we try to get s_umount, and make sure
821 * s_root isn't NULL.
822 */
823 if (down_read_trylock(&sb->s_umount)) {
824 if ((sb->s_root != NULL) &&
825 (!list_empty(&sb->s_dentry_lru))) {
826 __shrink_dcache_sb(sb, &w_count,
827 DCACHE_REFERENCED);
828 pruned -= w_count;
829 }
830 up_read(&sb->s_umount);
831 }
832 spin_lock(&sb_lock);
833 if (p)
834 __put_super(p);
835 count -= pruned;
836 p = sb;
837 /* more work left to do? */
838 if (count <= 0)
839 break;
840 }
841 if (p)
842 __put_super(p);
843 spin_unlock(&sb_lock);
844} 799}
845 800
846/** 801/**
@@ -1215,45 +1170,13 @@ void shrink_dcache_parent(struct dentry * parent)
1215 int found; 1170 int found;
1216 1171
1217 while ((found = select_parent(parent)) != 0) 1172 while ((found = select_parent(parent)) != 0)
1218 __shrink_dcache_sb(sb, &found, 0); 1173 __shrink_dcache_sb(sb, found, 0);
1219} 1174}
1220EXPORT_SYMBOL(shrink_dcache_parent); 1175EXPORT_SYMBOL(shrink_dcache_parent);
1221 1176
1222/*
1223 * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
1224 *
1225 * We need to avoid reentering the filesystem if the caller is performing a
1226 * GFP_NOFS allocation attempt. One example deadlock is:
1227 *
1228 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1229 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1230 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1231 *
1232 * In this case we return -1 to tell the caller that we baled.
1233 */
1234static int shrink_dcache_memory(struct shrinker *shrink,
1235 struct shrink_control *sc)
1236{
1237 int nr = sc->nr_to_scan;
1238 gfp_t gfp_mask = sc->gfp_mask;
1239
1240 if (nr) {
1241 if (!(gfp_mask & __GFP_FS))
1242 return -1;
1243 prune_dcache(nr);
1244 }
1245
1246 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1247}
1248
1249static struct shrinker dcache_shrinker = {
1250 .shrink = shrink_dcache_memory,
1251 .seeks = DEFAULT_SEEKS,
1252};
1253
1254/** 1177/**
1255 * d_alloc - allocate a dcache entry 1178 * __d_alloc - allocate a dcache entry
1256 * @parent: parent of entry to allocate 1179 * @sb: filesystem it will belong to
1257 * @name: qstr of the name 1180 * @name: qstr of the name
1258 * 1181 *
1259 * Allocates a dentry. It returns %NULL if there is insufficient memory 1182 * Allocates a dentry. It returns %NULL if there is insufficient memory
@@ -1261,7 +1184,7 @@ static struct shrinker dcache_shrinker = {
1261 * copied and the copy passed in may be reused after this call. 1184 * copied and the copy passed in may be reused after this call.
1262 */ 1185 */
1263 1186
1264struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1187struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1265{ 1188{
1266 struct dentry *dentry; 1189 struct dentry *dentry;
1267 char *dname; 1190 char *dname;
@@ -1291,8 +1214,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1291 spin_lock_init(&dentry->d_lock); 1214 spin_lock_init(&dentry->d_lock);
1292 seqcount_init(&dentry->d_seq); 1215 seqcount_init(&dentry->d_seq);
1293 dentry->d_inode = NULL; 1216 dentry->d_inode = NULL;
1294 dentry->d_parent = NULL; 1217 dentry->d_parent = dentry;
1295 dentry->d_sb = NULL; 1218 dentry->d_sb = sb;
1296 dentry->d_op = NULL; 1219 dentry->d_op = NULL;
1297 dentry->d_fsdata = NULL; 1220 dentry->d_fsdata = NULL;
1298 INIT_HLIST_BL_NODE(&dentry->d_hash); 1221 INIT_HLIST_BL_NODE(&dentry->d_hash);
@@ -1300,36 +1223,47 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1300 INIT_LIST_HEAD(&dentry->d_subdirs); 1223 INIT_LIST_HEAD(&dentry->d_subdirs);
1301 INIT_LIST_HEAD(&dentry->d_alias); 1224 INIT_LIST_HEAD(&dentry->d_alias);
1302 INIT_LIST_HEAD(&dentry->d_u.d_child); 1225 INIT_LIST_HEAD(&dentry->d_u.d_child);
1303 1226 d_set_d_op(dentry, dentry->d_sb->s_d_op);
1304 if (parent) {
1305 spin_lock(&parent->d_lock);
1306 /*
1307 * don't need child lock because it is not subject
1308 * to concurrency here
1309 */
1310 __dget_dlock(parent);
1311 dentry->d_parent = parent;
1312 dentry->d_sb = parent->d_sb;
1313 d_set_d_op(dentry, dentry->d_sb->s_d_op);
1314 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1315 spin_unlock(&parent->d_lock);
1316 }
1317 1227
1318 this_cpu_inc(nr_dentry); 1228 this_cpu_inc(nr_dentry);
1319 1229
1320 return dentry; 1230 return dentry;
1321} 1231}
1232
1233/**
1234 * d_alloc - allocate a dcache entry
1235 * @parent: parent of entry to allocate
1236 * @name: qstr of the name
1237 *
1238 * Allocates a dentry. It returns %NULL if there is insufficient memory
1239 * available. On a success the dentry is returned. The name passed in is
1240 * copied and the copy passed in may be reused after this call.
1241 */
1242struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1243{
1244 struct dentry *dentry = __d_alloc(parent->d_sb, name);
1245 if (!dentry)
1246 return NULL;
1247
1248 spin_lock(&parent->d_lock);
1249 /*
1250 * don't need child lock because it is not subject
1251 * to concurrency here
1252 */
1253 __dget_dlock(parent);
1254 dentry->d_parent = parent;
1255 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1256 spin_unlock(&parent->d_lock);
1257
1258 return dentry;
1259}
1322EXPORT_SYMBOL(d_alloc); 1260EXPORT_SYMBOL(d_alloc);
1323 1261
1324struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1262struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1325{ 1263{
1326 struct dentry *dentry = d_alloc(NULL, name); 1264 struct dentry *dentry = __d_alloc(sb, name);
1327 if (dentry) { 1265 if (dentry)
1328 dentry->d_sb = sb;
1329 d_set_d_op(dentry, dentry->d_sb->s_d_op);
1330 dentry->d_parent = dentry;
1331 dentry->d_flags |= DCACHE_DISCONNECTED; 1266 dentry->d_flags |= DCACHE_DISCONNECTED;
1332 }
1333 return dentry; 1267 return dentry;
1334} 1268}
1335EXPORT_SYMBOL(d_alloc_pseudo); 1269EXPORT_SYMBOL(d_alloc_pseudo);
@@ -1499,13 +1433,9 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1499 if (root_inode) { 1433 if (root_inode) {
1500 static const struct qstr name = { .name = "/", .len = 1 }; 1434 static const struct qstr name = { .name = "/", .len = 1 };
1501 1435
1502 res = d_alloc(NULL, &name); 1436 res = __d_alloc(root_inode->i_sb, &name);
1503 if (res) { 1437 if (res)
1504 res->d_sb = root_inode->i_sb;
1505 d_set_d_op(res, res->d_sb->s_d_op);
1506 res->d_parent = res;
1507 d_instantiate(res, root_inode); 1438 d_instantiate(res, root_inode);
1508 }
1509 } 1439 }
1510 return res; 1440 return res;
1511} 1441}
@@ -1566,13 +1496,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
1566 if (res) 1496 if (res)
1567 goto out_iput; 1497 goto out_iput;
1568 1498
1569 tmp = d_alloc(NULL, &anonstring); 1499 tmp = __d_alloc(inode->i_sb, &anonstring);
1570 if (!tmp) { 1500 if (!tmp) {
1571 res = ERR_PTR(-ENOMEM); 1501 res = ERR_PTR(-ENOMEM);
1572 goto out_iput; 1502 goto out_iput;
1573 } 1503 }
1574 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1575
1576 1504
1577 spin_lock(&inode->i_lock); 1505 spin_lock(&inode->i_lock);
1578 res = __d_find_any_alias(inode); 1506 res = __d_find_any_alias(inode);
@@ -1584,8 +1512,6 @@ struct dentry *d_obtain_alias(struct inode *inode)
1584 1512
1585 /* attach a disconnected dentry */ 1513 /* attach a disconnected dentry */
1586 spin_lock(&tmp->d_lock); 1514 spin_lock(&tmp->d_lock);
1587 tmp->d_sb = inode->i_sb;
1588 d_set_d_op(tmp, tmp->d_sb->s_d_op);
1589 tmp->d_inode = inode; 1515 tmp->d_inode = inode;
1590 tmp->d_flags |= DCACHE_DISCONNECTED; 1516 tmp->d_flags |= DCACHE_DISCONNECTED;
1591 list_add(&tmp->d_alias, &inode->i_dentry); 1517 list_add(&tmp->d_alias, &inode->i_dentry);
@@ -1626,6 +1552,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1626{ 1552{
1627 struct dentry *new = NULL; 1553 struct dentry *new = NULL;
1628 1554
1555 if (IS_ERR(inode))
1556 return ERR_CAST(inode);
1557
1629 if (inode && S_ISDIR(inode->i_mode)) { 1558 if (inode && S_ISDIR(inode->i_mode)) {
1630 spin_lock(&inode->i_lock); 1559 spin_lock(&inode->i_lock);
1631 new = __d_find_alias(inode, 1); 1560 new = __d_find_alias(inode, 1);
@@ -1708,29 +1637,22 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1708 } 1637 }
1709 1638
1710 /* 1639 /*
1711 * Negative dentry: instantiate it unless the inode is a directory and 1640 * We are going to instantiate this dentry, unhash it and clear the
1712 * already has a dentry. 1641 * lookup flag so we can do that.
1713 */ 1642 */
1714 spin_lock(&inode->i_lock); 1643 if (unlikely(d_need_lookup(found)))
1715 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1644 d_clear_need_lookup(found);
1716 __d_instantiate(found, inode);
1717 spin_unlock(&inode->i_lock);
1718 security_d_instantiate(found, inode);
1719 return found;
1720 }
1721 1645
1722 /* 1646 /*
1723 * In case a directory already has a (disconnected) entry grab a 1647 * Negative dentry: instantiate it unless the inode is a directory and
1724 * reference to it, move it in place and use it. 1648 * already has a dentry.
1725 */ 1649 */
1726 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1650 new = d_splice_alias(inode, found);
1727 __dget(new); 1651 if (new) {
1728 spin_unlock(&inode->i_lock); 1652 dput(found);
1729 security_d_instantiate(found, inode); 1653 found = new;
1730 d_move(new, found); 1654 }
1731 iput(inode); 1655 return found;
1732 dput(found);
1733 return new;
1734 1656
1735err_out: 1657err_out:
1736 iput(inode); 1658 iput(inode);
@@ -3045,8 +2967,6 @@ static void __init dcache_init(void)
3045 */ 2967 */
3046 dentry_cache = KMEM_CACHE(dentry, 2968 dentry_cache = KMEM_CACHE(dentry,
3047 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 2969 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3048
3049 register_shrinker(&dcache_shrinker);
3050 2970
3051 /* Hash may have been set up in dcache_init_early */ 2971 /* Hash may have been set up in dcache_init_early */
3052 if (!hashdist) 2972 if (!hashdist)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ac5f164170e3..01d2d9ef609c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -135,6 +135,50 @@ struct dio {
135 struct page *pages[DIO_PAGES]; /* page buffer */ 135 struct page *pages[DIO_PAGES]; /* page buffer */
136}; 136};
137 137
138static void __inode_dio_wait(struct inode *inode)
139{
140 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
141 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
142
143 do {
144 prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
145 if (atomic_read(&inode->i_dio_count))
146 schedule();
147 } while (atomic_read(&inode->i_dio_count));
148 finish_wait(wq, &q.wait);
149}
150
151/**
152 * inode_dio_wait - wait for outstanding DIO requests to finish
153 * @inode: inode to wait for
154 *
155 * Waits for all pending direct I/O requests to finish so that we can
156 * proceed with a truncate or equivalent operation.
157 *
158 * Must be called under a lock that serializes taking new references
159 * to i_dio_count, usually by inode->i_mutex.
160 */
161void inode_dio_wait(struct inode *inode)
162{
163 if (atomic_read(&inode->i_dio_count))
164 __inode_dio_wait(inode);
165}
166EXPORT_SYMBOL_GPL(inode_dio_wait);
167
168/*
169 * inode_dio_done - signal finish of a direct I/O requests
170 * @inode: inode the direct I/O happens on
171 *
172 * This is called once we've finished processing a direct I/O request,
173 * and is used to wake up callers waiting for direct I/O to be quiesced.
174 */
175void inode_dio_done(struct inode *inode)
176{
177 if (atomic_dec_and_test(&inode->i_dio_count))
178 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
179}
180EXPORT_SYMBOL_GPL(inode_dio_done);
181
138/* 182/*
139 * How many pages are in the queue? 183 * How many pages are in the queue?
140 */ 184 */
@@ -249,14 +293,12 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
249 if (dio->end_io && dio->result) { 293 if (dio->end_io && dio->result) {
250 dio->end_io(dio->iocb, offset, transferred, 294 dio->end_io(dio->iocb, offset, transferred,
251 dio->map_bh.b_private, ret, is_async); 295 dio->map_bh.b_private, ret, is_async);
252 } else if (is_async) { 296 } else {
253 aio_complete(dio->iocb, ret, 0); 297 if (is_async)
298 aio_complete(dio->iocb, ret, 0);
299 inode_dio_done(dio->inode);
254 } 300 }
255 301
256 if (dio->flags & DIO_LOCKING)
257 /* lockdep: non-owner release */
258 up_read_non_owner(&dio->inode->i_alloc_sem);
259
260 return ret; 302 return ret;
261} 303}
262 304
@@ -980,9 +1022,6 @@ out:
980 return ret; 1022 return ret;
981} 1023}
982 1024
983/*
984 * Releases both i_mutex and i_alloc_sem
985 */
986static ssize_t 1025static ssize_t
987direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 1026direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
988 const struct iovec *iov, loff_t offset, unsigned long nr_segs, 1027 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
@@ -1146,15 +1185,16 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1146 * For writes this function is called under i_mutex and returns with 1185 * For writes this function is called under i_mutex and returns with
1147 * i_mutex held, for reads, i_mutex is not held on entry, but it is 1186 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1148 * taken and dropped again before returning. 1187 * taken and dropped again before returning.
1149 * For reads and writes i_alloc_sem is taken in shared mode and released
1150 * on I/O completion (which may happen asynchronously after returning to
1151 * the caller).
1152 *
1153 * - if the flags value does NOT contain DIO_LOCKING we don't use any 1188 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1154 * internal locking but rather rely on the filesystem to synchronize 1189 * internal locking but rather rely on the filesystem to synchronize
1155 * direct I/O reads/writes versus each other and truncate. 1190 * direct I/O reads/writes versus each other and truncate.
1156 * For reads and writes both i_mutex and i_alloc_sem are not held on 1191 *
1157 * entry and are never taken. 1192 * To help with locking against truncate we incremented the i_dio_count
1193 * counter before starting direct I/O, and decrement it once we are done.
1194 * Truncate can wait for it to reach zero to provide exclusion. It is
1195 * expected that filesystem provide exclusion between new direct I/O
1196 * and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
1197 * but other filesystems need to take care of this on their own.
1158 */ 1198 */
1159ssize_t 1199ssize_t
1160__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1200__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
@@ -1200,6 +1240,10 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1200 } 1240 }
1201 } 1241 }
1202 1242
1243 /* watch out for a 0 len io from a tricksy fs */
1244 if (rw == READ && end == offset)
1245 return 0;
1246
1203 dio = kmalloc(sizeof(*dio), GFP_KERNEL); 1247 dio = kmalloc(sizeof(*dio), GFP_KERNEL);
1204 retval = -ENOMEM; 1248 retval = -ENOMEM;
1205 if (!dio) 1249 if (!dio)
@@ -1213,8 +1257,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1213 1257
1214 dio->flags = flags; 1258 dio->flags = flags;
1215 if (dio->flags & DIO_LOCKING) { 1259 if (dio->flags & DIO_LOCKING) {
1216 /* watch out for a 0 len io from a tricksy fs */ 1260 if (rw == READ) {
1217 if (rw == READ && end > offset) {
1218 struct address_space *mapping = 1261 struct address_space *mapping =
1219 iocb->ki_filp->f_mapping; 1262 iocb->ki_filp->f_mapping;
1220 1263
@@ -1229,15 +1272,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1229 goto out; 1272 goto out;
1230 } 1273 }
1231 } 1274 }
1232
1233 /*
1234 * Will be released at I/O completion, possibly in a
1235 * different thread.
1236 */
1237 down_read_non_owner(&inode->i_alloc_sem);
1238 } 1275 }
1239 1276
1240 /* 1277 /*
1278 * Will be decremented at I/O completion time.
1279 */
1280 atomic_inc(&inode->i_dio_count);
1281
1282 /*
1241 * For file extending writes updating i_size before data 1283 * For file extending writes updating i_size before data
1242 * writeouts complete can expose uninitialized blocks. So 1284 * writeouts complete can expose uninitialized blocks. So
1243 * even for AIO, we need to wait for i/o to complete before 1285 * even for AIO, we need to wait for i/o to complete before
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 4ec9eb00a241..c6ac98cf9baa 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -270,14 +270,15 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
270} 270}
271 271
272static int 272static int
273ecryptfs_fsync(struct file *file, int datasync) 273ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
274{ 274{
275 int rc = 0; 275 int rc = 0;
276 276
277 rc = generic_file_fsync(file, datasync); 277 rc = generic_file_fsync(file, start, end, datasync);
278 if (rc) 278 if (rc)
279 goto out; 279 goto out;
280 rc = vfs_fsync(ecryptfs_file_to_lower(file), datasync); 280 rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end,
281 datasync);
281out: 282out:
282 return rc; 283 return rc;
283} 284}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 7349ade17de6..340c657a108c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -147,7 +147,6 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
147 * @lower_dir_inode: inode of the parent in the lower fs of the new file 147 * @lower_dir_inode: inode of the parent in the lower fs of the new file
148 * @dentry: New file's dentry 148 * @dentry: New file's dentry
149 * @mode: The mode of the new file 149 * @mode: The mode of the new file
150 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
151 * 150 *
152 * Creates the file in the lower file system. 151 * Creates the file in the lower file system.
153 * 152 *
@@ -155,31 +154,10 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
155 */ 154 */
156static int 155static int
157ecryptfs_create_underlying_file(struct inode *lower_dir_inode, 156ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
158 struct dentry *dentry, int mode, 157 struct dentry *dentry, int mode)
159 struct nameidata *nd)
160{ 158{
161 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 159 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
162 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 160 return vfs_create(lower_dir_inode, lower_dentry, mode, NULL);
163 struct dentry *dentry_save;
164 struct vfsmount *vfsmount_save;
165 unsigned int flags_save;
166 int rc;
167
168 if (nd) {
169 dentry_save = nd->path.dentry;
170 vfsmount_save = nd->path.mnt;
171 flags_save = nd->flags;
172 nd->path.dentry = lower_dentry;
173 nd->path.mnt = lower_mnt;
174 nd->flags &= ~LOOKUP_OPEN;
175 }
176 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
177 if (nd) {
178 nd->path.dentry = dentry_save;
179 nd->path.mnt = vfsmount_save;
180 nd->flags = flags_save;
181 }
182 return rc;
183} 161}
184 162
185/** 163/**
@@ -197,8 +175,7 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
197 */ 175 */
198static int 176static int
199ecryptfs_do_create(struct inode *directory_inode, 177ecryptfs_do_create(struct inode *directory_inode,
200 struct dentry *ecryptfs_dentry, int mode, 178 struct dentry *ecryptfs_dentry, int mode)
201 struct nameidata *nd)
202{ 179{
203 int rc; 180 int rc;
204 struct dentry *lower_dentry; 181 struct dentry *lower_dentry;
@@ -213,7 +190,7 @@ ecryptfs_do_create(struct inode *directory_inode,
213 goto out; 190 goto out;
214 } 191 }
215 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, 192 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
216 ecryptfs_dentry, mode, nd); 193 ecryptfs_dentry, mode);
217 if (rc) { 194 if (rc) {
218 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 195 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
219 "rc = [%d]\n", __func__, rc); 196 "rc = [%d]\n", __func__, rc);
@@ -294,7 +271,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
294 int rc; 271 int rc;
295 272
296 /* ecryptfs_do_create() calls ecryptfs_interpose() */ 273 /* ecryptfs_do_create() calls ecryptfs_interpose() */
297 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); 274 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode);
298 if (unlikely(rc)) { 275 if (unlikely(rc)) {
299 ecryptfs_printk(KERN_WARNING, "Failed to create file in" 276 ecryptfs_printk(KERN_WARNING, "Failed to create file in"
300 "lower filesystem\n"); 277 "lower filesystem\n");
@@ -942,10 +919,8 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
942} 919}
943 920
944static int 921static int
945ecryptfs_permission(struct inode *inode, int mask, unsigned int flags) 922ecryptfs_permission(struct inode *inode, int mask)
946{ 923{
947 if (flags & IPERM_FLAG_RCU)
948 return -ECHILD;
949 return inode_permission(ecryptfs_inode_to_lower(inode), mask); 924 return inode_permission(ecryptfs_inode_to_lower(inode), mask);
950} 925}
951 926
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 1511bf9e5f80..832b10ded82f 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -60,14 +60,11 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
60 60
61struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { 61struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) {
62 efs_ino_t inodenum; 62 efs_ino_t inodenum;
63 struct inode * inode = NULL; 63 struct inode *inode = NULL;
64 64
65 inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); 65 inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
66 if (inodenum) { 66 if (inodenum)
67 inode = efs_iget(dir->i_sb, inodenum); 67 inode = efs_iget(dir->i_sb, inodenum);
68 if (IS_ERR(inode))
69 return ERR_CAST(inode);
70 }
71 68
72 return d_splice_alias(inode, dentry); 69 return d_splice_alias(inode, dentry);
73} 70}
diff --git a/fs/exec.c b/fs/exec.c
index d9576f261815..842d5700c155 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1114,6 +1114,13 @@ out:
1114} 1114}
1115EXPORT_SYMBOL(flush_old_exec); 1115EXPORT_SYMBOL(flush_old_exec);
1116 1116
1117void would_dump(struct linux_binprm *bprm, struct file *file)
1118{
1119 if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
1120 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1121}
1122EXPORT_SYMBOL(would_dump);
1123
1117void setup_new_exec(struct linux_binprm * bprm) 1124void setup_new_exec(struct linux_binprm * bprm)
1118{ 1125{
1119 int i, ch; 1126 int i, ch;
@@ -1153,9 +1160,10 @@ void setup_new_exec(struct linux_binprm * bprm)
1153 if (bprm->cred->uid != current_euid() || 1160 if (bprm->cred->uid != current_euid() ||
1154 bprm->cred->gid != current_egid()) { 1161 bprm->cred->gid != current_egid()) {
1155 current->pdeath_signal = 0; 1162 current->pdeath_signal = 0;
1156 } else if (file_permission(bprm->file, MAY_READ) || 1163 } else {
1157 bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) { 1164 would_dump(bprm, bprm->file);
1158 set_dumpable(current->mm, suid_dumpable); 1165 if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
1166 set_dumpable(current->mm, suid_dumpable);
1159 } 1167 }
1160 1168
1161 /* 1169 /*
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 45ca323d8363..491c6c078e7f 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -42,11 +42,19 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
42 * Note, in exofs all metadata is written as part of inode, regardless. 42 * Note, in exofs all metadata is written as part of inode, regardless.
43 * The writeout is synchronous 43 * The writeout is synchronous
44 */ 44 */
45static int exofs_file_fsync(struct file *filp, int datasync) 45static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
46 int datasync)
46{ 47{
48 struct inode *inode = filp->f_mapping->host;
47 int ret; 49 int ret;
48 50
51 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
52 if (ret)
53 return ret;
54
55 mutex_lock(&inode->i_mutex);
49 ret = sync_inode_metadata(filp->f_mapping->host, 1); 56 ret = sync_inode_metadata(filp->f_mapping->host, 1);
57 mutex_unlock(&inode->i_mutex);
50 return ret; 58 return ret;
51} 59}
52 60
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4d70db110cfc..b54c43775f17 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -55,12 +55,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
55 return ERR_PTR(-ENAMETOOLONG); 55 return ERR_PTR(-ENAMETOOLONG);
56 56
57 ino = exofs_inode_by_name(dir, dentry); 57 ino = exofs_inode_by_name(dir, dentry);
58 inode = NULL; 58 inode = ino ? exofs_iget(dir->i_sb, ino) : NULL;
59 if (ino) {
60 inode = exofs_iget(dir->i_sb, ino);
61 if (IS_ERR(inode))
62 return ERR_CAST(inode);
63 }
64 return d_splice_alias(inode, dentry); 59 return d_splice_alias(inode, dentry);
65} 60}
66 61
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index abea5a17c764..bfe651f9ae16 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,11 +232,11 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
232} 232}
233 233
234int 234int
235ext2_check_acl(struct inode *inode, int mask, unsigned int flags) 235ext2_check_acl(struct inode *inode, int mask)
236{ 236{
237 struct posix_acl *acl; 237 struct posix_acl *acl;
238 238
239 if (flags & IPERM_FLAG_RCU) { 239 if (mask & MAY_NOT_BLOCK) {
240 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 240 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
241 return -ECHILD; 241 return -ECHILD;
242 return -EAGAIN; 242 return -EAGAIN;
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index c939b7b12099..3ff6cbb9ac44 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size)
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext2_check_acl (struct inode *, int, unsigned int); 57extern int ext2_check_acl (struct inode *, int);
58extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
59extern int ext2_init_acl (struct inode *, struct inode *); 59extern int ext2_init_acl (struct inode *, struct inode *);
60 60
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 645be9e7ee47..af9fc89b1b2d 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -150,7 +150,8 @@ extern void ext2_write_super (struct super_block *);
150extern const struct file_operations ext2_dir_operations; 150extern const struct file_operations ext2_dir_operations;
151 151
152/* file.c */ 152/* file.c */
153extern int ext2_fsync(struct file *file, int datasync); 153extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
154 int datasync);
154extern const struct inode_operations ext2_file_inode_operations; 155extern const struct inode_operations ext2_file_inode_operations;
155extern const struct file_operations ext2_file_operations; 156extern const struct file_operations ext2_file_operations;
156extern const struct file_operations ext2_xip_file_operations; 157extern const struct file_operations ext2_xip_file_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 49eec9456c5b..82e06321de35 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -40,13 +40,13 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
40 return 0; 40 return 0;
41} 41}
42 42
43int ext2_fsync(struct file *file, int datasync) 43int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
44{ 44{
45 int ret; 45 int ret;
46 struct super_block *sb = file->f_mapping->host->i_sb; 46 struct super_block *sb = file->f_mapping->host->i_sb;
47 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; 47 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
48 48
49 ret = generic_file_fsync(file, datasync); 49 ret = generic_file_fsync(file, start, end, datasync);
50 if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) { 50 if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
51 /* We don't really know where the IO error happened... */ 51 /* We don't really know where the IO error happened... */
52 ext2_error(sb, __func__, 52 ext2_error(sb, __func__,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 788e09a07f7e..a8a58f63f07c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -843,8 +843,8 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
843 struct inode *inode = mapping->host; 843 struct inode *inode = mapping->host;
844 ssize_t ret; 844 ssize_t ret;
845 845
846 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 846 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
847 iov, offset, nr_segs, ext2_get_block, NULL); 847 ext2_get_block);
848 if (ret < 0 && (rw & WRITE)) 848 if (ret < 0 && (rw & WRITE))
849 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); 849 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
850 return ret; 850 return ret;
@@ -1184,6 +1184,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
1184 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1184 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1185 return -EPERM; 1185 return -EPERM;
1186 1186
1187 inode_dio_wait(inode);
1188
1187 if (mapping_is_xip(inode->i_mapping)) 1189 if (mapping_is_xip(inode->i_mapping))
1188 error = xip_truncate_page(inode->i_mapping, newsize); 1190 error = xip_truncate_page(inode->i_mapping, newsize);
1189 else if (test_opt(inode->i_sb, NOBH)) 1191 else if (test_opt(inode->i_sb, NOBH))
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index ed5c5d496ee9..d60b7099e2db 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -67,15 +67,11 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
67 inode = NULL; 67 inode = NULL;
68 if (ino) { 68 if (ino) {
69 inode = ext2_iget(dir->i_sb, ino); 69 inode = ext2_iget(dir->i_sb, ino);
70 if (IS_ERR(inode)) { 70 if (inode == ERR_PTR(-ESTALE)) {
71 if (PTR_ERR(inode) == -ESTALE) { 71 ext2_error(dir->i_sb, __func__,
72 ext2_error(dir->i_sb, __func__, 72 "deleted inode referenced: %lu",
73 "deleted inode referenced: %lu", 73 (unsigned long) ino);
74 (unsigned long) ino); 74 return ERR_PTR(-EIO);
75 return ERR_PTR(-EIO);
76 } else {
77 return ERR_CAST(inode);
78 }
79 } 75 }
80 } 76 }
81 return d_splice_alias(inode, dentry); 77 return d_splice_alias(inode, dentry);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 9d021c0d472a..edfeb293d4cb 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,11 +240,11 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
240} 240}
241 241
242int 242int
243ext3_check_acl(struct inode *inode, int mask, unsigned int flags) 243ext3_check_acl(struct inode *inode, int mask)
244{ 244{
245 struct posix_acl *acl; 245 struct posix_acl *acl;
246 246
247 if (flags & IPERM_FLAG_RCU) { 247 if (mask & MAY_NOT_BLOCK) {
248 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 248 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
249 return -ECHILD; 249 return -ECHILD;
250 return -EAGAIN; 250 return -EAGAIN;
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 5faf8048e906..597334626de9 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_check_acl (struct inode *, int, unsigned int); 57extern int ext3_check_acl (struct inode *, int);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 09b13bb34c94..0bcf63adb80a 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -43,7 +43,7 @@
43 * inode to disk. 43 * inode to disk.
44 */ 44 */
45 45
46int ext3_sync_file(struct file *file, int datasync) 46int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
47{ 47{
48 struct inode *inode = file->f_mapping->host; 48 struct inode *inode = file->f_mapping->host;
49 struct ext3_inode_info *ei = EXT3_I(inode); 49 struct ext3_inode_info *ei = EXT3_I(inode);
@@ -54,6 +54,17 @@ int ext3_sync_file(struct file *file, int datasync)
54 if (inode->i_sb->s_flags & MS_RDONLY) 54 if (inode->i_sb->s_flags & MS_RDONLY)
55 return 0; 55 return 0;
56 56
57 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
58 if (ret)
59 return ret;
60
61 /*
62 * Taking the mutex here just to keep consistent with how fsync was
63 * called previously, however it looks like we don't need to take
64 * i_mutex at all.
65 */
66 mutex_lock(&inode->i_mutex);
67
57 J_ASSERT(ext3_journal_current_handle() == NULL); 68 J_ASSERT(ext3_journal_current_handle() == NULL);
58 69
59 /* 70 /*
@@ -70,8 +81,10 @@ int ext3_sync_file(struct file *file, int datasync)
70 * (they were dirtied by commit). But that's OK - the blocks are 81 * (they were dirtied by commit). But that's OK - the blocks are
71 * safe in-journal, which is all fsync() needs to ensure. 82 * safe in-journal, which is all fsync() needs to ensure.
72 */ 83 */
73 if (ext3_should_journal_data(inode)) 84 if (ext3_should_journal_data(inode)) {
85 mutex_unlock(&inode->i_mutex);
74 return ext3_force_commit(inode->i_sb); 86 return ext3_force_commit(inode->i_sb);
87 }
75 88
76 if (datasync) 89 if (datasync)
77 commit_tid = atomic_read(&ei->i_datasync_tid); 90 commit_tid = atomic_read(&ei->i_datasync_tid);
@@ -91,5 +104,6 @@ int ext3_sync_file(struct file *file, int datasync)
91 */ 104 */
92 if (needs_barrier) 105 if (needs_barrier)
93 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
107 mutex_unlock(&inode->i_mutex);
94 return ret; 108 return ret;
95} 109}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3451d23c3bae..2978a2a17a59 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1816,9 +1816,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1816 } 1816 }
1817 1817
1818retry: 1818retry:
1819 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1819 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
1820 offset, nr_segs, 1820 ext3_get_block);
1821 ext3_get_block, NULL);
1822 /* 1821 /*
1823 * In case of error extending write may have instantiated a few 1822 * In case of error extending write may have instantiated a few
1824 * blocks outside i_size. Trim these off again. 1823 * blocks outside i_size. Trim these off again.
@@ -3216,6 +3215,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3216 ext3_journal_stop(handle); 3215 ext3_journal_stop(handle);
3217 } 3216 }
3218 3217
3218 if (attr->ia_valid & ATTR_SIZE)
3219 inode_dio_wait(inode);
3220
3219 if (S_ISREG(inode->i_mode) && 3221 if (S_ISREG(inode->i_mode) &&
3220 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 3222 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
3221 handle_t *handle; 3223 handle_t *handle;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 34b6d9bfc48a..c095cf5640c7 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1038,15 +1038,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1038 return ERR_PTR(-EIO); 1038 return ERR_PTR(-EIO);
1039 } 1039 }
1040 inode = ext3_iget(dir->i_sb, ino); 1040 inode = ext3_iget(dir->i_sb, ino);
1041 if (IS_ERR(inode)) { 1041 if (inode == ERR_PTR(-ESTALE)) {
1042 if (PTR_ERR(inode) == -ESTALE) { 1042 ext3_error(dir->i_sb, __func__,
1043 ext3_error(dir->i_sb, __func__, 1043 "deleted inode referenced: %lu",
1044 "deleted inode referenced: %lu", 1044 ino);
1045 ino); 1045 return ERR_PTR(-EIO);
1046 return ERR_PTR(-EIO);
1047 } else {
1048 return ERR_CAST(inode);
1049 }
1050 } 1046 }
1051 } 1047 }
1052 return d_splice_alias(inode, dentry); 1048 return d_splice_alias(inode, dentry);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index aad153ef6b78..b57ea2f91269 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1718,6 +1718,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1718 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1718 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1719 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1719 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1720 1720
1721 /* enable barriers by default */
1722 set_opt(sbi->s_mount_opt, BARRIER);
1721 set_opt(sbi->s_mount_opt, RESERVATION); 1723 set_opt(sbi->s_mount_opt, RESERVATION);
1722 1724
1723 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1725 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 21eacd7b7d79..60d900fcc3db 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,11 +238,11 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
238} 238}
239 239
240int 240int
241ext4_check_acl(struct inode *inode, int mask, unsigned int flags) 241ext4_check_acl(struct inode *inode, int mask)
242{ 242{
243 struct posix_acl *acl; 243 struct posix_acl *acl;
244 244
245 if (flags & IPERM_FLAG_RCU) { 245 if (mask & MAY_NOT_BLOCK) {
246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
247 return -ECHILD; 247 return -ECHILD;
248 return -EAGAIN; 248 return -EAGAIN;
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index dec821168fd4..9d843d5deac4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_check_acl(struct inode *, int, unsigned int); 57extern int ext4_check_acl(struct inode *, int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1921392cd708..fa44df879711 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1758,7 +1758,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1758extern void ext4_htree_free_dir_info(struct dir_private_info *p); 1758extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1759 1759
1760/* fsync.c */ 1760/* fsync.c */
1761extern int ext4_sync_file(struct file *, int); 1761extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
1762extern int ext4_flush_completed_IO(struct inode *); 1762extern int ext4_flush_completed_IO(struct inode *);
1763 1763
1764/* hash.c */ 1764/* hash.c */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2c0972322009..ce766f974b1d 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -236,6 +236,27 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
236 } 236 }
237 offset += file->f_pos; 237 offset += file->f_pos;
238 break; 238 break;
239 case SEEK_DATA:
240 /*
241 * In the generic case the entire file is data, so as long as
242 * offset isn't at the end of the file then the offset is data.
243 */
244 if (offset >= inode->i_size) {
245 mutex_unlock(&inode->i_mutex);
246 return -ENXIO;
247 }
248 break;
249 case SEEK_HOLE:
250 /*
251 * There is a virtual hole at the end of the file, so as long as
252 * offset isn't i_size or larger, return i_size.
253 */
254 if (offset >= inode->i_size) {
255 mutex_unlock(&inode->i_mutex);
256 return -ENXIO;
257 }
258 offset = inode->i_size;
259 break;
239 } 260 }
240 261
241 if (offset < 0 || offset > maxbytes) { 262 if (offset < 0 || offset > maxbytes) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index ce66d2fe826c..da3bed3e0c29 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -151,6 +151,32 @@ static int ext4_sync_parent(struct inode *inode)
151 return ret; 151 return ret;
152} 152}
153 153
154/**
155 * __sync_file - generic_file_fsync without the locking and filemap_write
156 * @inode: inode to sync
157 * @datasync: only sync essential metadata if true
158 *
159 * This is just generic_file_fsync without the locking. This is needed for
160 * nojournal mode to make sure this inodes data/metadata makes it to disk
161 * properly. The i_mutex should be held already.
162 */
163static int __sync_inode(struct inode *inode, int datasync)
164{
165 int err;
166 int ret;
167
168 ret = sync_mapping_buffers(inode->i_mapping);
169 if (!(inode->i_state & I_DIRTY))
170 return ret;
171 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
172 return ret;
173
174 err = sync_inode_metadata(inode, 1);
175 if (ret == 0)
176 ret = err;
177 return ret;
178}
179
154/* 180/*
155 * akpm: A new design for ext4_sync_file(). 181 * akpm: A new design for ext4_sync_file().
156 * 182 *
@@ -165,7 +191,7 @@ static int ext4_sync_parent(struct inode *inode)
165 * i_mutex lock is held when entering and exiting this function 191 * i_mutex lock is held when entering and exiting this function
166 */ 192 */
167 193
168int ext4_sync_file(struct file *file, int datasync) 194int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
169{ 195{
170 struct inode *inode = file->f_mapping->host; 196 struct inode *inode = file->f_mapping->host;
171 struct ext4_inode_info *ei = EXT4_I(inode); 197 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -178,15 +204,20 @@ int ext4_sync_file(struct file *file, int datasync)
178 204
179 trace_ext4_sync_file_enter(file, datasync); 205 trace_ext4_sync_file_enter(file, datasync);
180 206
207 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
208 if (ret)
209 return ret;
210 mutex_lock(&inode->i_mutex);
211
181 if (inode->i_sb->s_flags & MS_RDONLY) 212 if (inode->i_sb->s_flags & MS_RDONLY)
182 return 0; 213 goto out;
183 214
184 ret = ext4_flush_completed_IO(inode); 215 ret = ext4_flush_completed_IO(inode);
185 if (ret < 0) 216 if (ret < 0)
186 goto out; 217 goto out;
187 218
188 if (!journal) { 219 if (!journal) {
189 ret = generic_file_fsync(file, datasync); 220 ret = __sync_inode(inode, datasync);
190 if (!ret && !list_empty(&inode->i_dentry)) 221 if (!ret && !list_empty(&inode->i_dentry))
191 ret = ext4_sync_parent(inode); 222 ret = ext4_sync_parent(inode);
192 goto out; 223 goto out;
@@ -220,6 +251,7 @@ int ext4_sync_file(struct file *file, int datasync)
220 if (needs_barrier) 251 if (needs_barrier)
221 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 252 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
222 out: 253 out:
254 mutex_unlock(&inode->i_mutex);
223 trace_ext4_sync_file_exit(inode, ret); 255 trace_ext4_sync_file_exit(inode, ret);
224 return ret; 256 return ret;
225} 257}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e3126c051006..678cde834f19 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3501,10 +3501,8 @@ retry:
3501 offset, nr_segs, 3501 offset, nr_segs,
3502 ext4_get_block, NULL, NULL, 0); 3502 ext4_get_block, NULL, NULL, 0);
3503 else { 3503 else {
3504 ret = blockdev_direct_IO(rw, iocb, inode, 3504 ret = blockdev_direct_IO(rw, iocb, inode, iov,
3505 inode->i_sb->s_bdev, iov, 3505 offset, nr_segs, ext4_get_block);
3506 offset, nr_segs,
3507 ext4_get_block, NULL);
3508 3506
3509 if (unlikely((rw & WRITE) && ret < 0)) { 3507 if (unlikely((rw & WRITE) && ret < 0)) {
3510 loff_t isize = i_size_read(inode); 3508 loff_t isize = i_size_read(inode);
@@ -3575,6 +3573,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3575 ssize_t size, void *private, int ret, 3573 ssize_t size, void *private, int ret,
3576 bool is_async) 3574 bool is_async)
3577{ 3575{
3576 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
3578 ext4_io_end_t *io_end = iocb->private; 3577 ext4_io_end_t *io_end = iocb->private;
3579 struct workqueue_struct *wq; 3578 struct workqueue_struct *wq;
3580 unsigned long flags; 3579 unsigned long flags;
@@ -3596,6 +3595,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3596out: 3595out:
3597 if (is_async) 3596 if (is_async)
3598 aio_complete(iocb, ret, 0); 3597 aio_complete(iocb, ret, 0);
3598 inode_dio_done(inode);
3599 return; 3599 return;
3600 } 3600 }
3601 3601
@@ -3616,6 +3616,9 @@ out:
3616 /* queue the work to convert unwritten extents to written */ 3616 /* queue the work to convert unwritten extents to written */
3617 queue_work(wq, &io_end->work); 3617 queue_work(wq, &io_end->work);
3618 iocb->private = NULL; 3618 iocb->private = NULL;
3619
3620 /* XXX: probably should move into the real I/O completion handler */
3621 inode_dio_done(inode);
3619} 3622}
3620 3623
3621static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) 3624static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@@ -3748,11 +3751,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3748 EXT4_I(inode)->cur_aio_dio = iocb->private; 3751 EXT4_I(inode)->cur_aio_dio = iocb->private;
3749 } 3752 }
3750 3753
3751 ret = blockdev_direct_IO(rw, iocb, inode, 3754 ret = __blockdev_direct_IO(rw, iocb, inode,
3752 inode->i_sb->s_bdev, iov, 3755 inode->i_sb->s_bdev, iov,
3753 offset, nr_segs, 3756 offset, nr_segs,
3754 ext4_get_block_write, 3757 ext4_get_block_write,
3755 ext4_end_io_dio); 3758 ext4_end_io_dio,
3759 NULL,
3760 DIO_LOCKING | DIO_SKIP_HOLES);
3756 if (iocb->private) 3761 if (iocb->private)
3757 EXT4_I(inode)->cur_aio_dio = NULL; 3762 EXT4_I(inode)->cur_aio_dio = NULL;
3758 /* 3763 /*
@@ -5351,6 +5356,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5351 } 5356 }
5352 5357
5353 if (attr->ia_valid & ATTR_SIZE) { 5358 if (attr->ia_valid & ATTR_SIZE) {
5359 inode_dio_wait(inode);
5360
5354 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 5361 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
5355 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 5362 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5356 5363
@@ -5843,80 +5850,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5843 struct page *page = vmf->page; 5850 struct page *page = vmf->page;
5844 loff_t size; 5851 loff_t size;
5845 unsigned long len; 5852 unsigned long len;
5846 int ret = -EINVAL; 5853 int ret;
5847 void *fsdata;
5848 struct file *file = vma->vm_file; 5854 struct file *file = vma->vm_file;
5849 struct inode *inode = file->f_path.dentry->d_inode; 5855 struct inode *inode = file->f_path.dentry->d_inode;
5850 struct address_space *mapping = inode->i_mapping; 5856 struct address_space *mapping = inode->i_mapping;
5857 handle_t *handle;
5858 get_block_t *get_block;
5859 int retries = 0;
5851 5860
5852 /* 5861 /*
5853 * Get i_alloc_sem to stop truncates messing with the inode. We cannot 5862 * This check is racy but catches the common case. We rely on
5854 * get i_mutex because we are already holding mmap_sem. 5863 * __block_page_mkwrite() to do a reliable check.
5855 */ 5864 */
5856 down_read(&inode->i_alloc_sem); 5865 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
5857 size = i_size_read(inode); 5866 /* Delalloc case is easy... */
5858 if (page->mapping != mapping || size <= page_offset(page) 5867 if (test_opt(inode->i_sb, DELALLOC) &&
5859 || !PageUptodate(page)) { 5868 !ext4_should_journal_data(inode) &&
5860 /* page got truncated from under us? */ 5869 !ext4_nonda_switch(inode->i_sb)) {
5861 goto out_unlock; 5870 do {
5871 ret = __block_page_mkwrite(vma, vmf,
5872 ext4_da_get_block_prep);
5873 } while (ret == -ENOSPC &&
5874 ext4_should_retry_alloc(inode->i_sb, &retries));
5875 goto out_ret;
5862 } 5876 }
5863 ret = 0;
5864 5877
5865 lock_page(page); 5878 lock_page(page);
5866 wait_on_page_writeback(page); 5879 size = i_size_read(inode);
5867 if (PageMappedToDisk(page)) { 5880 /* Page got truncated from under us? */
5868 up_read(&inode->i_alloc_sem); 5881 if (page->mapping != mapping || page_offset(page) > size) {
5869 return VM_FAULT_LOCKED; 5882 unlock_page(page);
5883 ret = VM_FAULT_NOPAGE;
5884 goto out;
5870 } 5885 }
5871 5886
5872 if (page->index == size >> PAGE_CACHE_SHIFT) 5887 if (page->index == size >> PAGE_CACHE_SHIFT)
5873 len = size & ~PAGE_CACHE_MASK; 5888 len = size & ~PAGE_CACHE_MASK;
5874 else 5889 else
5875 len = PAGE_CACHE_SIZE; 5890 len = PAGE_CACHE_SIZE;
5876
5877 /* 5891 /*
5878 * return if we have all the buffers mapped. This avoid 5892 * Return if we have all the buffers mapped. This avoids the need to do
5879 * the need to call write_begin/write_end which does a 5893 * journal_start/journal_stop which can block and take a long time
5880 * journal_start/journal_stop which can block and take
5881 * long time
5882 */ 5894 */
5883 if (page_has_buffers(page)) { 5895 if (page_has_buffers(page)) {
5884 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 5896 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
5885 ext4_bh_unmapped)) { 5897 ext4_bh_unmapped)) {
5886 up_read(&inode->i_alloc_sem); 5898 /* Wait so that we don't change page under IO */
5887 return VM_FAULT_LOCKED; 5899 wait_on_page_writeback(page);
5900 ret = VM_FAULT_LOCKED;
5901 goto out;
5888 } 5902 }
5889 } 5903 }
5890 unlock_page(page); 5904 unlock_page(page);
5891 /* 5905 /* OK, we need to fill the hole... */
5892 * OK, we need to fill the hole... Do write_begin write_end 5906 if (ext4_should_dioread_nolock(inode))
5893 * to do block allocation/reservation.We are not holding 5907 get_block = ext4_get_block_write;
5894 * inode.i__mutex here. That allow * parallel write_begin, 5908 else
5895 * write_end call. lock_page prevent this from happening 5909 get_block = ext4_get_block;
5896 * on the same page though 5910retry_alloc:
5897 */ 5911 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
5898 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), 5912 if (IS_ERR(handle)) {
5899 len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
5900 if (ret < 0)
5901 goto out_unlock;
5902 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
5903 len, len, page, fsdata);
5904 if (ret < 0)
5905 goto out_unlock;
5906 ret = 0;
5907
5908 /*
5909 * write_begin/end might have created a dirty page and someone
5910 * could wander in and start the IO. Make sure that hasn't
5911 * happened.
5912 */
5913 lock_page(page);
5914 wait_on_page_writeback(page);
5915 up_read(&inode->i_alloc_sem);
5916 return VM_FAULT_LOCKED;
5917out_unlock:
5918 if (ret)
5919 ret = VM_FAULT_SIGBUS; 5913 ret = VM_FAULT_SIGBUS;
5920 up_read(&inode->i_alloc_sem); 5914 goto out;
5915 }
5916 ret = __block_page_mkwrite(vma, vmf, get_block);
5917 if (!ret && ext4_should_journal_data(inode)) {
5918 if (walk_page_buffers(handle, page_buffers(page), 0,
5919 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
5920 unlock_page(page);
5921 ret = VM_FAULT_SIGBUS;
5922 goto out;
5923 }
5924 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
5925 }
5926 ext4_journal_stop(handle);
5927 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
5928 goto retry_alloc;
5929out_ret:
5930 ret = block_page_mkwrite_return(ret);
5931out:
5921 return ret; 5932 return ret;
5922} 5933}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b754b7721f51..707d605bf769 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1037,15 +1037,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1037 return ERR_PTR(-EIO); 1037 return ERR_PTR(-EIO);
1038 } 1038 }
1039 inode = ext4_iget(dir->i_sb, ino); 1039 inode = ext4_iget(dir->i_sb, ino);
1040 if (IS_ERR(inode)) { 1040 if (inode == ERR_PTR(-ESTALE)) {
1041 if (PTR_ERR(inode) == -ESTALE) { 1041 EXT4_ERROR_INODE(dir,
1042 EXT4_ERROR_INODE(dir, 1042 "deleted inode referenced: %u",
1043 "deleted inode referenced: %u", 1043 ino);
1044 ino); 1044 return ERR_PTR(-EIO);
1045 return ERR_PTR(-EIO);
1046 } else {
1047 return ERR_CAST(inode);
1048 }
1049 } 1045 }
1050 } 1046 }
1051 return d_splice_alias(inode, dentry); 1047 return d_splice_alias(inode, dentry);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 8276cc282dec..a5d3853822e0 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -109,6 +109,7 @@ struct msdos_inode_info {
109 int i_attrs; /* unused attribute bits */ 109 int i_attrs; /* unused attribute bits */
110 loff_t i_pos; /* on-disk position of directory entry or 0 */ 110 loff_t i_pos; /* on-disk position of directory entry or 0 */
111 struct hlist_node i_fat_hash; /* hash by i_location */ 111 struct hlist_node i_fat_hash; /* hash by i_location */
112 struct rw_semaphore truncate_lock; /* protect bmap against truncate */
112 struct inode vfs_inode; 113 struct inode vfs_inode;
113}; 114};
114 115
@@ -309,7 +310,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
309extern void fat_truncate_blocks(struct inode *inode, loff_t offset); 310extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
310extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, 311extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
311 struct kstat *stat); 312 struct kstat *stat);
312extern int fat_file_fsync(struct file *file, int datasync); 313extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
314 int datasync);
313 315
314/* fat/inode.c */ 316/* fat/inode.c */
315extern void fat_attach(struct inode *inode, loff_t i_pos); 317extern void fat_attach(struct inode *inode, loff_t i_pos);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7018e1d8902d..c118acf16e43 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -149,12 +149,12 @@ static int fat_file_release(struct inode *inode, struct file *filp)
149 return 0; 149 return 0;
150} 150}
151 151
152int fat_file_fsync(struct file *filp, int datasync) 152int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
153{ 153{
154 struct inode *inode = filp->f_mapping->host; 154 struct inode *inode = filp->f_mapping->host;
155 int res, err; 155 int res, err;
156 156
157 res = generic_file_fsync(filp, datasync); 157 res = generic_file_fsync(filp, start, end, datasync);
158 err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); 158 err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
159 159
160 return res ? res : err; 160 return res ? res : err;
@@ -397,6 +397,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
397 * sequence. 397 * sequence.
398 */ 398 */
399 if (attr->ia_valid & ATTR_SIZE) { 399 if (attr->ia_valid & ATTR_SIZE) {
400 inode_dio_wait(inode);
401
400 if (attr->ia_size > inode->i_size) { 402 if (attr->ia_size > inode->i_size) {
401 error = fat_cont_expand(inode, attr->ia_size); 403 error = fat_cont_expand(inode, attr->ia_size);
402 if (error || attr->ia_valid == ATTR_SIZE) 404 if (error || attr->ia_valid == ATTR_SIZE)
@@ -429,8 +431,10 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
429 } 431 }
430 432
431 if (attr->ia_valid & ATTR_SIZE) { 433 if (attr->ia_valid & ATTR_SIZE) {
434 down_write(&MSDOS_I(inode)->truncate_lock);
432 truncate_setsize(inode, attr->ia_size); 435 truncate_setsize(inode, attr->ia_size);
433 fat_truncate_blocks(inode, attr->ia_size); 436 fat_truncate_blocks(inode, attr->ia_size);
437 up_write(&MSDOS_I(inode)->truncate_lock);
434 } 438 }
435 439
436 setattr_copy(inode, attr); 440 setattr_copy(inode, attr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index cb8d8391ac0b..5942fec22c65 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -211,8 +211,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
211 * FAT need to use the DIO_LOCKING for avoiding the race 211 * FAT need to use the DIO_LOCKING for avoiding the race
212 * condition of fat_get_block() and ->truncate(). 212 * condition of fat_get_block() and ->truncate().
213 */ 213 */
214 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 214 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
215 iov, offset, nr_segs, fat_get_block, NULL); 215 fat_get_block);
216 if (ret < 0 && (rw & WRITE)) 216 if (ret < 0 && (rw & WRITE))
217 fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); 217 fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
218 218
@@ -224,9 +224,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
224 sector_t blocknr; 224 sector_t blocknr;
225 225
226 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ 226 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
227 down_read(&mapping->host->i_alloc_sem); 227 down_read(&MSDOS_I(mapping->host)->truncate_lock);
228 blocknr = generic_block_bmap(mapping, block, fat_get_block); 228 blocknr = generic_block_bmap(mapping, block, fat_get_block);
229 up_read(&mapping->host->i_alloc_sem); 229 up_read(&MSDOS_I(mapping->host)->truncate_lock);
230 230
231 return blocknr; 231 return blocknr;
232} 232}
@@ -510,6 +510,8 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
510 ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS); 510 ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
511 if (!ei) 511 if (!ei)
512 return NULL; 512 return NULL;
513
514 init_rwsem(&ei->truncate_lock);
513 return &ei->vfs_inode; 515 return &ei->vfs_inode;
514} 516}
515 517
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3b222dafd15b..66e83b845455 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -209,29 +209,20 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
209 int err; 209 int err;
210 210
211 lock_super(sb); 211 lock_super(sb);
212
213 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); 212 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
214 if (err) { 213 switch (err) {
215 if (err == -ENOENT) { 214 case -ENOENT:
216 inode = NULL; 215 inode = NULL;
217 goto out; 216 break;
218 } 217 case 0:
219 goto error; 218 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
220 } 219 brelse(sinfo.bh);
221 220 break;
222 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 221 default:
223 brelse(sinfo.bh); 222 inode = ERR_PTR(err);
224 if (IS_ERR(inode)) {
225 err = PTR_ERR(inode);
226 goto error;
227 } 223 }
228out:
229 unlock_super(sb); 224 unlock_super(sb);
230 return d_splice_alias(inode, dentry); 225 return d_splice_alias(inode, dentry);
231
232error:
233 unlock_super(sb);
234 return ERR_PTR(err);
235} 226}
236 227
237/***** Creates a directory entry (name is already formatted). */ 228/***** Creates a directory entry (name is already formatted). */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 20b4ea53fdc4..bb3f29c3557b 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -82,10 +82,8 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
82 * case sensitive name which is specified by user if this is 82 * case sensitive name which is specified by user if this is
83 * for creation. 83 * for creation.
84 */ 84 */
85 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { 85 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
86 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 86 return 0;
87 return 0;
88 }
89 87
90 return vfat_revalidate_shortname(dentry); 88 return vfat_revalidate_shortname(dentry);
91} 89}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0f015a0468de..b8c507ca42f7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -461,32 +461,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
461} 461}
462 462
463/* 463/*
464 * For background writeback the caller does not have the sb pinned
465 * before calling writeback. So make sure that we do pin it, so it doesn't
466 * go away while we are writing inodes from it.
467 */
468static bool pin_sb_for_writeback(struct super_block *sb)
469{
470 spin_lock(&sb_lock);
471 if (list_empty(&sb->s_instances)) {
472 spin_unlock(&sb_lock);
473 return false;
474 }
475
476 sb->s_count++;
477 spin_unlock(&sb_lock);
478
479 if (down_read_trylock(&sb->s_umount)) {
480 if (sb->s_root)
481 return true;
482 up_read(&sb->s_umount);
483 }
484
485 put_super(sb);
486 return false;
487}
488
489/*
490 * Write a portion of b_io inodes which belong to @sb. 464 * Write a portion of b_io inodes which belong to @sb.
491 * 465 *
492 * If @only_this_sb is true, then find and write all such 466 * If @only_this_sb is true, then find and write all such
@@ -585,7 +559,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
585 struct inode *inode = wb_inode(wb->b_io.prev); 559 struct inode *inode = wb_inode(wb->b_io.prev);
586 struct super_block *sb = inode->i_sb; 560 struct super_block *sb = inode->i_sb;
587 561
588 if (!pin_sb_for_writeback(sb)) { 562 if (!grab_super_passive(sb)) {
589 requeue_io(inode); 563 requeue_io(inode);
590 continue; 564 continue;
591 } 565 }
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d50160714595..9f63e493a9b6 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -382,7 +382,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
382 struct fuse_entry_out outentry; 382 struct fuse_entry_out outentry;
383 struct fuse_file *ff; 383 struct fuse_file *ff;
384 struct file *file; 384 struct file *file;
385 int flags = nd->intent.open.flags - 1; 385 int flags = nd->intent.open.flags;
386 386
387 if (fc->no_create) 387 if (fc->no_create)
388 return -ENOSYS; 388 return -ENOSYS;
@@ -576,7 +576,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
576static int fuse_create(struct inode *dir, struct dentry *entry, int mode, 576static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
577 struct nameidata *nd) 577 struct nameidata *nd)
578{ 578{
579 if (nd && (nd->flags & LOOKUP_OPEN)) { 579 if (nd) {
580 int err = fuse_create_open(dir, entry, mode, nd); 580 int err = fuse_create_open(dir, entry, mode, nd);
581 if (err != -ENOSYS) 581 if (err != -ENOSYS)
582 return err; 582 return err;
@@ -971,9 +971,9 @@ static int fuse_access(struct inode *inode, int mask)
971 return err; 971 return err;
972} 972}
973 973
974static int fuse_perm_getattr(struct inode *inode, int flags) 974static int fuse_perm_getattr(struct inode *inode, int mask)
975{ 975{
976 if (flags & IPERM_FLAG_RCU) 976 if (mask & MAY_NOT_BLOCK)
977 return -ECHILD; 977 return -ECHILD;
978 978
979 return fuse_do_getattr(inode, NULL, NULL); 979 return fuse_do_getattr(inode, NULL, NULL);
@@ -992,7 +992,7 @@ static int fuse_perm_getattr(struct inode *inode, int flags)
992 * access request is sent. Execute permission is still checked 992 * access request is sent. Execute permission is still checked
993 * locally based on file mode. 993 * locally based on file mode.
994 */ 994 */
995static int fuse_permission(struct inode *inode, int mask, unsigned int flags) 995static int fuse_permission(struct inode *inode, int mask)
996{ 996{
997 struct fuse_conn *fc = get_fuse_conn(inode); 997 struct fuse_conn *fc = get_fuse_conn(inode);
998 bool refreshed = false; 998 bool refreshed = false;
@@ -1011,23 +1011,22 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1011 if (fi->i_time < get_jiffies_64()) { 1011 if (fi->i_time < get_jiffies_64()) {
1012 refreshed = true; 1012 refreshed = true;
1013 1013
1014 err = fuse_perm_getattr(inode, flags); 1014 err = fuse_perm_getattr(inode, mask);
1015 if (err) 1015 if (err)
1016 return err; 1016 return err;
1017 } 1017 }
1018 } 1018 }
1019 1019
1020 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1020 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1021 err = generic_permission(inode, mask, flags, NULL); 1021 err = generic_permission(inode, mask);
1022 1022
1023 /* If permission is denied, try to refresh file 1023 /* If permission is denied, try to refresh file
1024 attributes. This is also needed, because the root 1024 attributes. This is also needed, because the root
1025 node will at first have no permissions */ 1025 node will at first have no permissions */
1026 if (err == -EACCES && !refreshed) { 1026 if (err == -EACCES && !refreshed) {
1027 err = fuse_perm_getattr(inode, flags); 1027 err = fuse_perm_getattr(inode, mask);
1028 if (!err) 1028 if (!err)
1029 err = generic_permission(inode, mask, 1029 err = generic_permission(inode, mask);
1030 flags, NULL);
1031 } 1030 }
1032 1031
1033 /* Note: the opposite of the above test does not 1032 /* Note: the opposite of the above test does not
@@ -1035,7 +1034,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1035 noticed immediately, only after the attribute 1034 noticed immediately, only after the attribute
1036 timeout has expired */ 1035 timeout has expired */
1037 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1036 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1038 if (flags & IPERM_FLAG_RCU) 1037 if (mask & MAY_NOT_BLOCK)
1039 return -ECHILD; 1038 return -ECHILD;
1040 1039
1041 err = fuse_access(inode, mask); 1040 err = fuse_access(inode, mask);
@@ -1044,7 +1043,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1044 if (refreshed) 1043 if (refreshed)
1045 return -EACCES; 1044 return -EACCES;
1046 1045
1047 err = fuse_perm_getattr(inode, flags); 1046 err = fuse_perm_getattr(inode, mask);
1048 if (!err && !(inode->i_mode & S_IXUGO)) 1047 if (!err && !(inode->i_mode & S_IXUGO))
1049 return -EACCES; 1048 return -EACCES;
1050 } 1049 }
@@ -1177,9 +1176,10 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
1177 return 0; 1176 return 0;
1178} 1177}
1179 1178
1180static int fuse_dir_fsync(struct file *file, int datasync) 1179static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1180 int datasync)
1181{ 1181{
1182 return fuse_fsync_common(file, datasync, 1); 1182 return fuse_fsync_common(file, start, end, datasync, 1);
1183} 1183}
1184 1184
1185static bool update_mtime(unsigned ivalid) 1185static bool update_mtime(unsigned ivalid)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 82a66466a24c..7bb685cdd00c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -400,7 +400,8 @@ static void fuse_sync_writes(struct inode *inode)
400 fuse_release_nowrite(inode); 400 fuse_release_nowrite(inode);
401} 401}
402 402
403int fuse_fsync_common(struct file *file, int datasync, int isdir) 403int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
404 int datasync, int isdir)
404{ 405{
405 struct inode *inode = file->f_mapping->host; 406 struct inode *inode = file->f_mapping->host;
406 struct fuse_conn *fc = get_fuse_conn(inode); 407 struct fuse_conn *fc = get_fuse_conn(inode);
@@ -412,9 +413,15 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
412 if (is_bad_inode(inode)) 413 if (is_bad_inode(inode))
413 return -EIO; 414 return -EIO;
414 415
416 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
417 if (err)
418 return err;
419
415 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 420 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
416 return 0; 421 return 0;
417 422
423 mutex_lock(&inode->i_mutex);
424
418 /* 425 /*
419 * Start writeback against all dirty pages of the inode, then 426 * Start writeback against all dirty pages of the inode, then
420 * wait for all outstanding writes, before sending the FSYNC 427 * wait for all outstanding writes, before sending the FSYNC
@@ -422,13 +429,15 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
422 */ 429 */
423 err = write_inode_now(inode, 0); 430 err = write_inode_now(inode, 0);
424 if (err) 431 if (err)
425 return err; 432 goto out;
426 433
427 fuse_sync_writes(inode); 434 fuse_sync_writes(inode);
428 435
429 req = fuse_get_req(fc); 436 req = fuse_get_req(fc);
430 if (IS_ERR(req)) 437 if (IS_ERR(req)) {
431 return PTR_ERR(req); 438 err = PTR_ERR(req);
439 goto out;
440 }
432 441
433 memset(&inarg, 0, sizeof(inarg)); 442 memset(&inarg, 0, sizeof(inarg));
434 inarg.fh = ff->fh; 443 inarg.fh = ff->fh;
@@ -448,12 +457,15 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
448 fc->no_fsync = 1; 457 fc->no_fsync = 1;
449 err = 0; 458 err = 0;
450 } 459 }
460out:
461 mutex_unlock(&inode->i_mutex);
451 return err; 462 return err;
452} 463}
453 464
454static int fuse_fsync(struct file *file, int datasync) 465static int fuse_fsync(struct file *file, loff_t start, loff_t end,
466 int datasync)
455{ 467{
456 return fuse_fsync_common(file, datasync, 0); 468 return fuse_fsync_common(file, start, end, datasync, 0);
457} 469}
458 470
459void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, 471void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
@@ -1600,15 +1612,32 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1600 struct inode *inode = file->f_path.dentry->d_inode; 1612 struct inode *inode = file->f_path.dentry->d_inode;
1601 1613
1602 mutex_lock(&inode->i_mutex); 1614 mutex_lock(&inode->i_mutex);
1603 switch (origin) { 1615 if (origin != SEEK_CUR || origin != SEEK_SET) {
1604 case SEEK_END:
1605 retval = fuse_update_attributes(inode, NULL, file, NULL); 1616 retval = fuse_update_attributes(inode, NULL, file, NULL);
1606 if (retval) 1617 if (retval)
1607 goto exit; 1618 goto exit;
1619 }
1620
1621 switch (origin) {
1622 case SEEK_END:
1608 offset += i_size_read(inode); 1623 offset += i_size_read(inode);
1609 break; 1624 break;
1610 case SEEK_CUR: 1625 case SEEK_CUR:
1611 offset += file->f_pos; 1626 offset += file->f_pos;
1627 break;
1628 case SEEK_DATA:
1629 if (offset >= i_size_read(inode)) {
1630 retval = -ENXIO;
1631 goto exit;
1632 }
1633 break;
1634 case SEEK_HOLE:
1635 if (offset >= i_size_read(inode)) {
1636 retval = -ENXIO;
1637 goto exit;
1638 }
1639 offset = i_size_read(inode);
1640 break;
1612 } 1641 }
1613 retval = -EINVAL; 1642 retval = -EINVAL;
1614 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 1643 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b788becada76..c6aa2d4b8517 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -589,7 +589,8 @@ void fuse_release_common(struct file *file, int opcode);
589/** 589/**
590 * Send FSYNC or FSYNCDIR request 590 * Send FSYNC or FSYNCDIR request
591 */ 591 */
592int fuse_fsync_common(struct file *file, int datasync, int isdir); 592int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
593 int datasync, int isdir);
593 594
594/** 595/**
595 * Notify poll wakeup 596 * Notify poll wakeup
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 8f26d1a58912..70e90b4974ce 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,9 +190,9 @@ generic_acl_chmod(struct inode *inode)
190} 190}
191 191
192int 192int
193generic_check_acl(struct inode *inode, int mask, unsigned int flags) 193generic_check_acl(struct inode *inode, int mask)
194{ 194{
195 if (flags & IPERM_FLAG_RCU) { 195 if (mask & MAY_NOT_BLOCK) {
196 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 196 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
197 return -ECHILD; 197 return -ECHILD;
198 } else { 198 } else {
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index cbc07155b1a0..8ef1079f1665 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -75,12 +75,12 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
75 * Returns: errno 75 * Returns: errno
76 */ 76 */
77 77
78int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags) 78int gfs2_check_acl(struct inode *inode, int mask)
79{ 79{
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU) { 83 if (mask & MAY_NOT_BLOCK) {
84 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 84 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
85 return -ECHILD; 85 return -ECHILD;
86 return -EAGAIN; 86 return -EAGAIN;
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index a93907c8159b..b522b0cb39ea 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" 16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
17#define GFS2_ACL_MAX_ENTRIES 25 17#define GFS2_ACL_MAX_ENTRIES 25
18 18
19extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int); 19extern int gfs2_check_acl(struct inode *inode, int mask);
20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); 20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); 21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
22extern const struct xattr_handler gfs2_xattr_system_handler; 22extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 42e477f31223..7878c473ae62 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1216,6 +1216,8 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1216 if (ret) 1216 if (ret)
1217 return ret; 1217 return ret;
1218 1218
1219 inode_dio_wait(inode);
1220
1219 oldsize = inode->i_size; 1221 oldsize = inode->i_size;
1220 if (newsize >= oldsize) 1222 if (newsize >= oldsize)
1221 return do_grow(inode, newsize); 1223 return do_grow(inode, newsize);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index bc2590ef5fc1..edeb9e802903 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -245,7 +245,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
245 !capable(CAP_LINUX_IMMUTABLE)) 245 !capable(CAP_LINUX_IMMUTABLE))
246 goto out; 246 goto out;
247 if (!IS_IMMUTABLE(inode)) { 247 if (!IS_IMMUTABLE(inode)) {
248 error = gfs2_permission(inode, MAY_WRITE, 0); 248 error = gfs2_permission(inode, MAY_WRITE);
249 if (error) 249 if (error)
250 goto out; 250 goto out;
251 } 251 }
@@ -546,7 +546,9 @@ static int gfs2_close(struct inode *inode, struct file *file)
546 546
547/** 547/**
548 * gfs2_fsync - sync the dirty data for a file (across the cluster) 548 * gfs2_fsync - sync the dirty data for a file (across the cluster)
549 * @file: the file that points to the dentry (we ignore this) 549 * @file: the file that points to the dentry
550 * @start: the start position in the file to sync
551 * @end: the end position in the file to sync
550 * @datasync: set if we can ignore timestamp changes 552 * @datasync: set if we can ignore timestamp changes
551 * 553 *
552 * The VFS will flush data for us. We only need to worry 554 * The VFS will flush data for us. We only need to worry
@@ -555,23 +557,32 @@ static int gfs2_close(struct inode *inode, struct file *file)
555 * Returns: errno 557 * Returns: errno
556 */ 558 */
557 559
558static int gfs2_fsync(struct file *file, int datasync) 560static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
561 int datasync)
559{ 562{
560 struct inode *inode = file->f_mapping->host; 563 struct inode *inode = file->f_mapping->host;
561 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 564 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
562 struct gfs2_inode *ip = GFS2_I(inode); 565 struct gfs2_inode *ip = GFS2_I(inode);
563 int ret; 566 int ret;
564 567
568 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
569 if (ret)
570 return ret;
571 mutex_lock(&inode->i_mutex);
572
565 if (datasync) 573 if (datasync)
566 sync_state &= ~I_DIRTY_SYNC; 574 sync_state &= ~I_DIRTY_SYNC;
567 575
568 if (sync_state) { 576 if (sync_state) {
569 ret = sync_inode_metadata(inode, 1); 577 ret = sync_inode_metadata(inode, 1);
570 if (ret) 578 if (ret) {
579 mutex_unlock(&inode->i_mutex);
571 return ret; 580 return ret;
581 }
572 gfs2_ail_flush(ip->i_gl); 582 gfs2_ail_flush(ip->i_gl);
573 } 583 }
574 584
585 mutex_unlock(&inode->i_mutex);
575 return 0; 586 return 0;
576} 587}
577 588
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 03e0c529063e..0fb51a96eff0 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -307,7 +307,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
307 } 307 }
308 308
309 if (!is_root) { 309 if (!is_root) {
310 error = gfs2_permission(dir, MAY_EXEC, 0); 310 error = gfs2_permission(dir, MAY_EXEC);
311 if (error) 311 if (error)
312 goto out; 312 goto out;
313 } 313 }
@@ -337,7 +337,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
337{ 337{
338 int error; 338 int error;
339 339
340 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); 340 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
341 if (error) 341 if (error)
342 return error; 342 return error;
343 343
@@ -792,13 +792,8 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, 792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
793 struct nameidata *nd) 793 struct nameidata *nd)
794{ 794{
795 struct inode *inode = NULL; 795 struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0);
796 796 if (inode && !IS_ERR(inode)) {
797 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
798 if (inode && IS_ERR(inode))
799 return ERR_CAST(inode);
800
801 if (inode) {
802 struct gfs2_glock *gl = GFS2_I(inode)->i_gl; 797 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
803 struct gfs2_holder gh; 798 struct gfs2_holder gh;
804 int error; 799 int error;
@@ -808,11 +803,8 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
808 return ERR_PTR(error); 803 return ERR_PTR(error);
809 } 804 }
810 gfs2_glock_dq_uninit(&gh); 805 gfs2_glock_dq_uninit(&gh);
811 return d_splice_alias(inode, dentry);
812 } 806 }
813 d_add(dentry, inode); 807 return d_splice_alias(inode, dentry);
814
815 return NULL;
816} 808}
817 809
818/** 810/**
@@ -857,7 +849,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
857 if (inode->i_nlink == 0) 849 if (inode->i_nlink == 0)
858 goto out_gunlock; 850 goto out_gunlock;
859 851
860 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0); 852 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
861 if (error) 853 if (error)
862 goto out_gunlock; 854 goto out_gunlock;
863 855
@@ -990,7 +982,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
990 if (IS_APPEND(&dip->i_inode)) 982 if (IS_APPEND(&dip->i_inode))
991 return -EPERM; 983 return -EPERM;
992 984
993 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); 985 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
994 if (error) 986 if (error)
995 return error; 987 return error;
996 988
@@ -1336,7 +1328,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1336 } 1328 }
1337 } 1329 }
1338 } else { 1330 } else {
1339 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0); 1331 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
1340 if (error) 1332 if (error)
1341 goto out_gunlock; 1333 goto out_gunlock;
1342 1334
@@ -1371,7 +1363,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1371 /* Check out the dir to be renamed */ 1363 /* Check out the dir to be renamed */
1372 1364
1373 if (dir_rename) { 1365 if (dir_rename) {
1374 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0); 1366 error = gfs2_permission(odentry->d_inode, MAY_WRITE);
1375 if (error) 1367 if (error)
1376 goto out_gunlock; 1368 goto out_gunlock;
1377 } 1369 }
@@ -1543,7 +1535,7 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1543 * Returns: errno 1535 * Returns: errno
1544 */ 1536 */
1545 1537
1546int gfs2_permission(struct inode *inode, int mask, unsigned int flags) 1538int gfs2_permission(struct inode *inode, int mask)
1547{ 1539{
1548 struct gfs2_inode *ip; 1540 struct gfs2_inode *ip;
1549 struct gfs2_holder i_gh; 1541 struct gfs2_holder i_gh;
@@ -1553,7 +1545,7 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1553 1545
1554 ip = GFS2_I(inode); 1546 ip = GFS2_I(inode);
1555 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1547 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1556 if (flags & IPERM_FLAG_RCU) 1548 if (mask & MAY_NOT_BLOCK)
1557 return -ECHILD; 1549 return -ECHILD;
1558 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1550 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1559 if (error) 1551 if (error)
@@ -1564,7 +1556,7 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1564 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) 1556 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1565 error = -EACCES; 1557 error = -EACCES;
1566 else 1558 else
1567 error = generic_permission(inode, mask, flags, gfs2_check_acl); 1559 error = generic_permission(inode, mask);
1568 if (unlock) 1560 if (unlock)
1569 gfs2_glock_dq_uninit(&i_gh); 1561 gfs2_glock_dq_uninit(&i_gh);
1570 1562
@@ -1854,6 +1846,7 @@ const struct inode_operations gfs2_file_iops = {
1854 .listxattr = gfs2_listxattr, 1846 .listxattr = gfs2_listxattr,
1855 .removexattr = gfs2_removexattr, 1847 .removexattr = gfs2_removexattr,
1856 .fiemap = gfs2_fiemap, 1848 .fiemap = gfs2_fiemap,
1849 .check_acl = gfs2_check_acl,
1857}; 1850};
1858 1851
1859const struct inode_operations gfs2_dir_iops = { 1852const struct inode_operations gfs2_dir_iops = {
@@ -1874,6 +1867,7 @@ const struct inode_operations gfs2_dir_iops = {
1874 .listxattr = gfs2_listxattr, 1867 .listxattr = gfs2_listxattr,
1875 .removexattr = gfs2_removexattr, 1868 .removexattr = gfs2_removexattr,
1876 .fiemap = gfs2_fiemap, 1869 .fiemap = gfs2_fiemap,
1870 .check_acl = gfs2_check_acl,
1877}; 1871};
1878 1872
1879const struct inode_operations gfs2_symlink_iops = { 1873const struct inode_operations gfs2_symlink_iops = {
@@ -1888,5 +1882,6 @@ const struct inode_operations gfs2_symlink_iops = {
1888 .listxattr = gfs2_listxattr, 1882 .listxattr = gfs2_listxattr,
1889 .removexattr = gfs2_removexattr, 1883 .removexattr = gfs2_removexattr,
1890 .fiemap = gfs2_fiemap, 1884 .fiemap = gfs2_fiemap,
1885 .check_acl = gfs2_check_acl,
1891}; 1886};
1892 1887
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 31606076f701..8d90e0c07672 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -108,7 +108,7 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip);
108 108
109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
110 int is_root); 110 int is_root);
111extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); 111extern int gfs2_permission(struct inode *inode, int mask);
112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index fff16c968e67..96a1b625fc74 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -123,8 +123,8 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
123 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 123 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
124 ssize_t ret; 124 ssize_t ret;
125 125
126 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 126 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
127 offset, nr_segs, hfs_get_block, NULL); 127 hfs_get_block);
128 128
129 /* 129 /*
130 * In case of error extending write may have instantiated a few 130 * In case of error extending write may have instantiated a few
@@ -615,6 +615,8 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
615 615
616 if ((attr->ia_valid & ATTR_SIZE) && 616 if ((attr->ia_valid & ATTR_SIZE) &&
617 attr->ia_size != i_size_read(inode)) { 617 attr->ia_size != i_size_read(inode)) {
618 inode_dio_wait(inode);
619
618 error = vmtruncate(inode, attr->ia_size); 620 error = vmtruncate(inode, attr->ia_size);
619 if (error) 621 if (error)
620 return error; 622 return error;
@@ -625,12 +627,18 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
625 return 0; 627 return 0;
626} 628}
627 629
628static int hfs_file_fsync(struct file *filp, int datasync) 630static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
631 int datasync)
629{ 632{
630 struct inode *inode = filp->f_mapping->host; 633 struct inode *inode = filp->f_mapping->host;
631 struct super_block * sb; 634 struct super_block * sb;
632 int ret, err; 635 int ret, err;
633 636
637 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
638 if (ret)
639 return ret;
640 mutex_lock(&inode->i_mutex);
641
634 /* sync the inode to buffers */ 642 /* sync the inode to buffers */
635 ret = write_inode_now(inode, 0); 643 ret = write_inode_now(inode, 0);
636 644
@@ -647,6 +655,7 @@ static int hfs_file_fsync(struct file *filp, int datasync)
647 err = sync_blockdev(sb->s_bdev); 655 err = sync_blockdev(sb->s_bdev);
648 if (!ret) 656 if (!ret)
649 ret = err; 657 ret = err;
658 mutex_unlock(&inode->i_mutex);
650 return ret; 659 return ret;
651} 660}
652 661
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 81dfd1e495e3..d7674d051f52 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -404,7 +404,8 @@ int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
404int hfsplus_cat_write_inode(struct inode *); 404int hfsplus_cat_write_inode(struct inode *);
405struct inode *hfsplus_new_inode(struct super_block *, int); 405struct inode *hfsplus_new_inode(struct super_block *, int);
406void hfsplus_delete_inode(struct inode *); 406void hfsplus_delete_inode(struct inode *);
407int hfsplus_file_fsync(struct file *file, int datasync); 407int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
408 int datasync);
408 409
409/* ioctl.c */ 410/* ioctl.c */
410long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 411long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 010cd363d085..4cc1e3a36ec7 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -119,8 +119,8 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
119 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 119 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
120 ssize_t ret; 120 ssize_t ret;
121 121
122 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 122 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
123 offset, nr_segs, hfsplus_get_block, NULL); 123 hfsplus_get_block);
124 124
125 /* 125 /*
126 * In case of error extending write may have instantiated a few 126 * In case of error extending write may have instantiated a few
@@ -298,6 +298,8 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
298 298
299 if ((attr->ia_valid & ATTR_SIZE) && 299 if ((attr->ia_valid & ATTR_SIZE) &&
300 attr->ia_size != i_size_read(inode)) { 300 attr->ia_size != i_size_read(inode)) {
301 inode_dio_wait(inode);
302
301 error = vmtruncate(inode, attr->ia_size); 303 error = vmtruncate(inode, attr->ia_size);
302 if (error) 304 if (error)
303 return error; 305 return error;
@@ -308,13 +310,19 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
308 return 0; 310 return 0;
309} 311}
310 312
311int hfsplus_file_fsync(struct file *file, int datasync) 313int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
314 int datasync)
312{ 315{
313 struct inode *inode = file->f_mapping->host; 316 struct inode *inode = file->f_mapping->host;
314 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 317 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
315 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); 318 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
316 int error = 0, error2; 319 int error = 0, error2;
317 320
321 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
322 if (error)
323 return error;
324 mutex_lock(&inode->i_mutex);
325
318 /* 326 /*
319 * Sync inode metadata into the catalog and extent trees. 327 * Sync inode metadata into the catalog and extent trees.
320 */ 328 */
@@ -342,6 +350,8 @@ int hfsplus_file_fsync(struct file *file, int datasync)
342 if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) 350 if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
343 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 351 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
344 352
353 mutex_unlock(&inode->i_mutex);
354
345 return error; 355 return error;
346} 356}
347 357
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2638c834ed28..0d22afdd4611 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -362,9 +362,20 @@ retry:
362 return 0; 362 return 0;
363} 363}
364 364
365int hostfs_fsync(struct file *file, int datasync) 365int hostfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
366{ 366{
367 return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync); 367 struct inode *inode = file->f_mapping->host;
368 int ret;
369
370 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
371 if (ret)
372 return ret;
373
374 mutex_lock(&inode->i_mutex);
375 ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
376 mutex_unlock(&inode->i_mutex);
377
378 return ret;
368} 379}
369 380
370static const struct file_operations hostfs_file_fops = { 381static const struct file_operations hostfs_file_fops = {
@@ -748,12 +759,12 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
748 return err; 759 return err;
749} 760}
750 761
751int hostfs_permission(struct inode *ino, int desired, unsigned int flags) 762int hostfs_permission(struct inode *ino, int desired)
752{ 763{
753 char *name; 764 char *name;
754 int r = 0, w = 0, x = 0, err; 765 int r = 0, w = 0, x = 0, err;
755 766
756 if (flags & IPERM_FLAG_RCU) 767 if (desired & MAY_NOT_BLOCK)
757 return -ECHILD; 768 return -ECHILD;
758 769
759 if (desired & MAY_READ) r = 1; 770 if (desired & MAY_READ) r = 1;
@@ -770,7 +781,7 @@ int hostfs_permission(struct inode *ino, int desired, unsigned int flags)
770 err = access_file(name, r, w, x); 781 err = access_file(name, r, w, x);
771 __putname(name); 782 __putname(name);
772 if (!err) 783 if (!err)
773 err = generic_permission(ino, desired, flags, NULL); 784 err = generic_permission(ino, desired);
774 return err; 785 return err;
775} 786}
776 787
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index f46ae025bfb5..96a8ed91cedd 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -29,6 +29,10 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
29 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 29 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
30 struct super_block *s = i->i_sb; 30 struct super_block *s = i->i_sb;
31 31
32 /* Somebody else will have to figure out what to do here */
33 if (whence == SEEK_DATA || whence == SEEK_HOLE)
34 return -EINVAL;
35
32 hpfs_lock(s); 36 hpfs_lock(s);
33 37
34 /*printk("dir lseek\n");*/ 38 /*printk("dir lseek\n");*/
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 89c500ee5213..89d2a5803ae3 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -18,9 +18,14 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
18 return 0; 18 return 0;
19} 19}
20 20
21int hpfs_file_fsync(struct file *file, int datasync) 21int hpfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
22{ 22{
23 struct inode *inode = file->f_mapping->host; 23 struct inode *inode = file->f_mapping->host;
24 int ret;
25
26 ret = filemap_write_and_wait_range(file->f_mapping, start, end);
27 if (ret)
28 return ret;
24 return sync_blockdev(inode->i_sb->s_bdev); 29 return sync_blockdev(inode->i_sb->s_bdev);
25} 30}
26 31
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index dd552f862c8f..331b5e234ef3 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -258,7 +258,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *,
258 258
259/* file.c */ 259/* file.c */
260 260
261int hpfs_file_fsync(struct file *, int); 261int hpfs_file_fsync(struct file *, loff_t, loff_t, int);
262extern const struct file_operations hpfs_file_ops; 262extern const struct file_operations hpfs_file_ops;
263extern const struct inode_operations hpfs_file_iops; 263extern const struct inode_operations hpfs_file_iops;
264extern const struct address_space_operations hpfs_aops; 264extern const struct address_space_operations hpfs_aops;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index acf95dab2aac..2df69e2f07cf 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -398,7 +398,7 @@ again:
398 hpfs_unlock(dir->i_sb); 398 hpfs_unlock(dir->i_sb);
399 return -ENOSPC; 399 return -ENOSPC;
400 } 400 }
401 if (generic_permission(inode, MAY_WRITE, 0, NULL) || 401 if (generic_permission(inode, MAY_WRITE) ||
402 !S_ISREG(inode->i_mode) || 402 !S_ISREG(inode->i_mode) ||
403 get_write_access(inode)) { 403 get_write_access(inode)) {
404 d_rehash(dentry); 404 d_rehash(dentry);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 85c098a499f3..8635be5ffd97 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -573,9 +573,10 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
573 return err; 573 return err;
574} 574}
575 575
576static int hppfs_fsync(struct file *file, int datasync) 576static int hppfs_fsync(struct file *file, loff_t start, loff_t end,
577 int datasync)
577{ 578{
578 return 0; 579 return filemap_write_and_wait_range(file->f_mapping, start, end);
579} 580}
580 581
581static const struct file_operations hppfs_dir_fops = { 582static const struct file_operations hppfs_dir_fops = {
diff --git a/fs/inode.c b/fs/inode.c
index 43566d17d1b8..96c77b81167c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -33,8 +33,8 @@
33 * 33 *
34 * inode->i_lock protects: 34 * inode->i_lock protects:
35 * inode->i_state, inode->i_hash, __iget() 35 * inode->i_state, inode->i_hash, __iget()
36 * inode_lru_lock protects: 36 * inode->i_sb->s_inode_lru_lock protects:
37 * inode_lru, inode->i_lru 37 * inode->i_sb->s_inode_lru, inode->i_lru
38 * inode_sb_list_lock protects: 38 * inode_sb_list_lock protects:
39 * sb->s_inodes, inode->i_sb_list 39 * sb->s_inodes, inode->i_sb_list
40 * inode_wb_list_lock protects: 40 * inode_wb_list_lock protects:
@@ -46,7 +46,7 @@
46 * 46 *
47 * inode_sb_list_lock 47 * inode_sb_list_lock
48 * inode->i_lock 48 * inode->i_lock
49 * inode_lru_lock 49 * inode->i_sb->s_inode_lru_lock
50 * 50 *
51 * inode_wb_list_lock 51 * inode_wb_list_lock
52 * inode->i_lock 52 * inode->i_lock
@@ -64,24 +64,10 @@ static unsigned int i_hash_shift __read_mostly;
64static struct hlist_head *inode_hashtable __read_mostly; 64static struct hlist_head *inode_hashtable __read_mostly;
65static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); 65static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
66 66
67static LIST_HEAD(inode_lru);
68static DEFINE_SPINLOCK(inode_lru_lock);
69
70__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); 67__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
71__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); 68__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
72 69
73/* 70/*
74 * iprune_sem provides exclusion between the icache shrinking and the
75 * umount path.
76 *
77 * We don't actually need it to protect anything in the umount path,
78 * but only need to cycle through it to make sure any inode that
79 * prune_icache took off the LRU list has been fully torn down by the
80 * time we are past evict_inodes.
81 */
82static DECLARE_RWSEM(iprune_sem);
83
84/*
85 * Empty aops. Can be used for the cases where the user does not 71 * Empty aops. Can be used for the cases where the user does not
86 * define any of the address_space operations. 72 * define any of the address_space operations.
87 */ 73 */
@@ -95,6 +81,7 @@ EXPORT_SYMBOL(empty_aops);
95struct inodes_stat_t inodes_stat; 81struct inodes_stat_t inodes_stat;
96 82
97static DEFINE_PER_CPU(unsigned int, nr_inodes); 83static DEFINE_PER_CPU(unsigned int, nr_inodes);
84static DEFINE_PER_CPU(unsigned int, nr_unused);
98 85
99static struct kmem_cache *inode_cachep __read_mostly; 86static struct kmem_cache *inode_cachep __read_mostly;
100 87
@@ -109,7 +96,11 @@ static int get_nr_inodes(void)
109 96
110static inline int get_nr_inodes_unused(void) 97static inline int get_nr_inodes_unused(void)
111{ 98{
112 return inodes_stat.nr_unused; 99 int i;
100 int sum = 0;
101 for_each_possible_cpu(i)
102 sum += per_cpu(nr_unused, i);
103 return sum < 0 ? 0 : sum;
113} 104}
114 105
115int get_nr_dirty_inodes(void) 106int get_nr_dirty_inodes(void)
@@ -127,6 +118,7 @@ int proc_nr_inodes(ctl_table *table, int write,
127 void __user *buffer, size_t *lenp, loff_t *ppos) 118 void __user *buffer, size_t *lenp, loff_t *ppos)
128{ 119{
129 inodes_stat.nr_inodes = get_nr_inodes(); 120 inodes_stat.nr_inodes = get_nr_inodes();
121 inodes_stat.nr_unused = get_nr_inodes_unused();
130 return proc_dointvec(table, write, buffer, lenp, ppos); 122 return proc_dointvec(table, write, buffer, lenp, ppos);
131} 123}
132#endif 124#endif
@@ -176,8 +168,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
176 mutex_init(&inode->i_mutex); 168 mutex_init(&inode->i_mutex);
177 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 169 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
178 170
179 init_rwsem(&inode->i_alloc_sem); 171 atomic_set(&inode->i_dio_count, 0);
180 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
181 172
182 mapping->a_ops = &empty_aops; 173 mapping->a_ops = &empty_aops;
183 mapping->host = inode; 174 mapping->host = inode;
@@ -337,22 +328,24 @@ EXPORT_SYMBOL(ihold);
337 328
338static void inode_lru_list_add(struct inode *inode) 329static void inode_lru_list_add(struct inode *inode)
339{ 330{
340 spin_lock(&inode_lru_lock); 331 spin_lock(&inode->i_sb->s_inode_lru_lock);
341 if (list_empty(&inode->i_lru)) { 332 if (list_empty(&inode->i_lru)) {
342 list_add(&inode->i_lru, &inode_lru); 333 list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
343 inodes_stat.nr_unused++; 334 inode->i_sb->s_nr_inodes_unused++;
335 this_cpu_inc(nr_unused);
344 } 336 }
345 spin_unlock(&inode_lru_lock); 337 spin_unlock(&inode->i_sb->s_inode_lru_lock);
346} 338}
347 339
348static void inode_lru_list_del(struct inode *inode) 340static void inode_lru_list_del(struct inode *inode)
349{ 341{
350 spin_lock(&inode_lru_lock); 342 spin_lock(&inode->i_sb->s_inode_lru_lock);
351 if (!list_empty(&inode->i_lru)) { 343 if (!list_empty(&inode->i_lru)) {
352 list_del_init(&inode->i_lru); 344 list_del_init(&inode->i_lru);
353 inodes_stat.nr_unused--; 345 inode->i_sb->s_nr_inodes_unused--;
346 this_cpu_dec(nr_unused);
354 } 347 }
355 spin_unlock(&inode_lru_lock); 348 spin_unlock(&inode->i_sb->s_inode_lru_lock);
356} 349}
357 350
358/** 351/**
@@ -537,14 +530,6 @@ void evict_inodes(struct super_block *sb)
537 spin_unlock(&inode_sb_list_lock); 530 spin_unlock(&inode_sb_list_lock);
538 531
539 dispose_list(&dispose); 532 dispose_list(&dispose);
540
541 /*
542 * Cycle through iprune_sem to make sure any inode that prune_icache
543 * moved off the list before we took the lock has been fully torn
544 * down.
545 */
546 down_write(&iprune_sem);
547 up_write(&iprune_sem);
548} 533}
549 534
550/** 535/**
@@ -607,8 +592,10 @@ static int can_unuse(struct inode *inode)
607} 592}
608 593
609/* 594/*
610 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 595 * Walk the superblock inode LRU for freeable inodes and attempt to free them.
611 * temporary list and then are freed outside inode_lru_lock by dispose_list(). 596 * This is called from the superblock shrinker function with a number of inodes
597 * to trim from the LRU. Inodes to be freed are moved to a temporary list and
598 * then are freed outside inode_lock by dispose_list().
612 * 599 *
613 * Any inodes which are pinned purely because of attached pagecache have their 600 * Any inodes which are pinned purely because of attached pagecache have their
614 * pagecache removed. If the inode has metadata buffers attached to 601 * pagecache removed. If the inode has metadata buffers attached to
@@ -622,29 +609,28 @@ static int can_unuse(struct inode *inode)
622 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 609 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
623 * with this flag set because they are the inodes that are out of order. 610 * with this flag set because they are the inodes that are out of order.
624 */ 611 */
625static void prune_icache(int nr_to_scan) 612void prune_icache_sb(struct super_block *sb, int nr_to_scan)
626{ 613{
627 LIST_HEAD(freeable); 614 LIST_HEAD(freeable);
628 int nr_scanned; 615 int nr_scanned;
629 unsigned long reap = 0; 616 unsigned long reap = 0;
630 617
631 down_read(&iprune_sem); 618 spin_lock(&sb->s_inode_lru_lock);
632 spin_lock(&inode_lru_lock); 619 for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
633 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
634 struct inode *inode; 620 struct inode *inode;
635 621
636 if (list_empty(&inode_lru)) 622 if (list_empty(&sb->s_inode_lru))
637 break; 623 break;
638 624
639 inode = list_entry(inode_lru.prev, struct inode, i_lru); 625 inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
640 626
641 /* 627 /*
642 * we are inverting the inode_lru_lock/inode->i_lock here, 628 * we are inverting the sb->s_inode_lru_lock/inode->i_lock here,
643 * so use a trylock. If we fail to get the lock, just move the 629 * so use a trylock. If we fail to get the lock, just move the
644 * inode to the back of the list so we don't spin on it. 630 * inode to the back of the list so we don't spin on it.
645 */ 631 */
646 if (!spin_trylock(&inode->i_lock)) { 632 if (!spin_trylock(&inode->i_lock)) {
647 list_move(&inode->i_lru, &inode_lru); 633 list_move(&inode->i_lru, &sb->s_inode_lru);
648 continue; 634 continue;
649 } 635 }
650 636
@@ -656,28 +642,29 @@ static void prune_icache(int nr_to_scan)
656 (inode->i_state & ~I_REFERENCED)) { 642 (inode->i_state & ~I_REFERENCED)) {
657 list_del_init(&inode->i_lru); 643 list_del_init(&inode->i_lru);
658 spin_unlock(&inode->i_lock); 644 spin_unlock(&inode->i_lock);
659 inodes_stat.nr_unused--; 645 sb->s_nr_inodes_unused--;
646 this_cpu_dec(nr_unused);
660 continue; 647 continue;
661 } 648 }
662 649
663 /* recently referenced inodes get one more pass */ 650 /* recently referenced inodes get one more pass */
664 if (inode->i_state & I_REFERENCED) { 651 if (inode->i_state & I_REFERENCED) {
665 inode->i_state &= ~I_REFERENCED; 652 inode->i_state &= ~I_REFERENCED;
666 list_move(&inode->i_lru, &inode_lru); 653 list_move(&inode->i_lru, &sb->s_inode_lru);
667 spin_unlock(&inode->i_lock); 654 spin_unlock(&inode->i_lock);
668 continue; 655 continue;
669 } 656 }
670 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 657 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
671 __iget(inode); 658 __iget(inode);
672 spin_unlock(&inode->i_lock); 659 spin_unlock(&inode->i_lock);
673 spin_unlock(&inode_lru_lock); 660 spin_unlock(&sb->s_inode_lru_lock);
674 if (remove_inode_buffers(inode)) 661 if (remove_inode_buffers(inode))
675 reap += invalidate_mapping_pages(&inode->i_data, 662 reap += invalidate_mapping_pages(&inode->i_data,
676 0, -1); 663 0, -1);
677 iput(inode); 664 iput(inode);
678 spin_lock(&inode_lru_lock); 665 spin_lock(&sb->s_inode_lru_lock);
679 666
680 if (inode != list_entry(inode_lru.next, 667 if (inode != list_entry(sb->s_inode_lru.next,
681 struct inode, i_lru)) 668 struct inode, i_lru))
682 continue; /* wrong inode or list_empty */ 669 continue; /* wrong inode or list_empty */
683 /* avoid lock inversions with trylock */ 670 /* avoid lock inversions with trylock */
@@ -693,51 +680,18 @@ static void prune_icache(int nr_to_scan)
693 spin_unlock(&inode->i_lock); 680 spin_unlock(&inode->i_lock);
694 681
695 list_move(&inode->i_lru, &freeable); 682 list_move(&inode->i_lru, &freeable);
696 inodes_stat.nr_unused--; 683 sb->s_nr_inodes_unused--;
684 this_cpu_dec(nr_unused);
697 } 685 }
698 if (current_is_kswapd()) 686 if (current_is_kswapd())
699 __count_vm_events(KSWAPD_INODESTEAL, reap); 687 __count_vm_events(KSWAPD_INODESTEAL, reap);
700 else 688 else
701 __count_vm_events(PGINODESTEAL, reap); 689 __count_vm_events(PGINODESTEAL, reap);
702 spin_unlock(&inode_lru_lock); 690 spin_unlock(&sb->s_inode_lru_lock);
703 691
704 dispose_list(&freeable); 692 dispose_list(&freeable);
705 up_read(&iprune_sem);
706} 693}
707 694
708/*
709 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here,
710 * "unused" means that no dentries are referring to the inodes: the files are
711 * not open and the dcache references to those inodes have already been
712 * reclaimed.
713 *
714 * This function is passed the number of inodes to scan, and it returns the
715 * total number of remaining possibly-reclaimable inodes.
716 */
717static int shrink_icache_memory(struct shrinker *shrink,
718 struct shrink_control *sc)
719{
720 int nr = sc->nr_to_scan;
721 gfp_t gfp_mask = sc->gfp_mask;
722
723 if (nr) {
724 /*
725 * Nasty deadlock avoidance. We may hold various FS locks,
726 * and we don't want to recurse into the FS that called us
727 * in clear_inode() and friends..
728 */
729 if (!(gfp_mask & __GFP_FS))
730 return -1;
731 prune_icache(nr);
732 }
733 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
734}
735
736static struct shrinker icache_shrinker = {
737 .shrink = shrink_icache_memory,
738 .seeks = DEFAULT_SEEKS,
739};
740
741static void __wait_on_freeing_inode(struct inode *inode); 695static void __wait_on_freeing_inode(struct inode *inode);
742/* 696/*
743 * Called with the inode lock held. 697 * Called with the inode lock held.
@@ -1331,7 +1285,7 @@ static void iput_final(struct inode *inode)
1331 1285
1332 WARN_ON(inode->i_state & I_NEW); 1286 WARN_ON(inode->i_state & I_NEW);
1333 1287
1334 if (op && op->drop_inode) 1288 if (op->drop_inode)
1335 drop = op->drop_inode(inode); 1289 drop = op->drop_inode(inode);
1336 else 1290 else
1337 drop = generic_drop_inode(inode); 1291 drop = generic_drop_inode(inode);
@@ -1617,7 +1571,6 @@ void __init inode_init(void)
1617 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1571 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
1618 SLAB_MEM_SPREAD), 1572 SLAB_MEM_SPREAD),
1619 init_once); 1573 init_once);
1620 register_shrinker(&icache_shrinker);
1621 1574
1622 /* Hash may have been set up in inode_init_early */ 1575 /* Hash may have been set up in inode_init_early */
1623 if (!hashdist) 1576 if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index b29c46e4e32f..fe327c20af83 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,6 +97,7 @@ extern struct file *get_empty_filp(void);
97 * super.c 97 * super.c
98 */ 98 */
99extern int do_remount_sb(struct super_block *, int, void *, int); 99extern int do_remount_sb(struct super_block *, int, void *, int);
100extern bool grab_super_passive(struct super_block *sb);
100extern void __put_super(struct super_block *sb); 101extern void __put_super(struct super_block *sb);
101extern void put_super(struct super_block *sb); 102extern void put_super(struct super_block *sb);
102extern struct dentry *mount_fs(struct file_system_type *, 103extern struct dentry *mount_fs(struct file_system_type *,
@@ -135,3 +136,8 @@ extern void inode_wb_list_del(struct inode *inode);
135extern int get_nr_dirty_inodes(void); 136extern int get_nr_dirty_inodes(void);
136extern void evict_inodes(struct super_block *); 137extern void evict_inodes(struct super_block *);
137extern int invalidate_inodes(struct super_block *, bool); 138extern int invalidate_inodes(struct super_block *, bool);
139
140/*
141 * dcache.c
142 */
143extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 0542b6eedf80..f20437c068a0 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -254,19 +254,16 @@ static int isofs_readdir(struct file *filp,
254 char *tmpname; 254 char *tmpname;
255 struct iso_directory_record *tmpde; 255 struct iso_directory_record *tmpde;
256 struct inode *inode = filp->f_path.dentry->d_inode; 256 struct inode *inode = filp->f_path.dentry->d_inode;
257 struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
258 257
259 tmpname = (char *)__get_free_page(GFP_KERNEL); 258 tmpname = (char *)__get_free_page(GFP_KERNEL);
260 if (tmpname == NULL) 259 if (tmpname == NULL)
261 return -ENOMEM; 260 return -ENOMEM;
262 261
263 mutex_lock(&sbi->s_mutex);
264 tmpde = (struct iso_directory_record *) (tmpname+1024); 262 tmpde = (struct iso_directory_record *) (tmpname+1024);
265 263
266 result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde); 264 result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde);
267 265
268 free_page((unsigned long) tmpname); 266 free_page((unsigned long) tmpname);
269 mutex_unlock(&sbi->s_mutex);
270 return result; 267 return result;
271} 268}
272 269
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index b3cc8586984e..a5d03672d04e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -863,7 +863,6 @@ root_found:
863 sbi->s_utf8 = opt.utf8; 863 sbi->s_utf8 = opt.utf8;
864 sbi->s_nocompress = opt.nocompress; 864 sbi->s_nocompress = opt.nocompress;
865 sbi->s_overriderockperm = opt.overriderockperm; 865 sbi->s_overriderockperm = opt.overriderockperm;
866 mutex_init(&sbi->s_mutex);
867 /* 866 /*
868 * It would be incredibly stupid to allow people to mark every file 867 * It would be incredibly stupid to allow people to mark every file
869 * on the disk as suid, so we merely allow them to set the default 868 * on the disk as suid, so we merely allow them to set the default
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 2882dc089f87..7d33de84f52a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -55,7 +55,6 @@ struct isofs_sb_info {
55 gid_t s_gid; 55 gid_t s_gid;
56 uid_t s_uid; 56 uid_t s_uid;
57 struct nls_table *s_nls_iocharset; /* Native language support table */ 57 struct nls_table *s_nls_iocharset; /* Native language support table */
58 struct mutex s_mutex; /* replaces BKL, please remove if possible */
59}; 58};
60 59
61#define ISOFS_INVALID_MODE ((mode_t) -1) 60#define ISOFS_INVALID_MODE ((mode_t) -1)
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 4fb3e8074fd4..1e2946f2a69e 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -168,7 +168,6 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
168 int found; 168 int found;
169 unsigned long uninitialized_var(block); 169 unsigned long uninitialized_var(block);
170 unsigned long uninitialized_var(offset); 170 unsigned long uninitialized_var(offset);
171 struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb);
172 struct inode *inode; 171 struct inode *inode;
173 struct page *page; 172 struct page *page;
174 173
@@ -176,21 +175,13 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
176 if (!page) 175 if (!page)
177 return ERR_PTR(-ENOMEM); 176 return ERR_PTR(-ENOMEM);
178 177
179 mutex_lock(&sbi->s_mutex);
180 found = isofs_find_entry(dir, dentry, 178 found = isofs_find_entry(dir, dentry,
181 &block, &offset, 179 &block, &offset,
182 page_address(page), 180 page_address(page),
183 1024 + page_address(page)); 181 1024 + page_address(page));
184 __free_page(page); 182 __free_page(page);
185 183
186 inode = NULL; 184 inode = found ? isofs_iget(dir->i_sb, block, offset) : NULL;
187 if (found) { 185
188 inode = isofs_iget(dir->i_sb, block, offset);
189 if (IS_ERR(inode)) {
190 mutex_unlock(&sbi->s_mutex);
191 return ERR_CAST(inode);
192 }
193 }
194 mutex_unlock(&sbi->s_mutex);
195 return d_splice_alias(inode, dentry); 186 return d_splice_alias(inode, dentry);
196} 187}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index f9cd04db6eab..1fbc7de88f50 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -678,7 +678,6 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
678 678
679 init_rock_state(&rs, inode); 679 init_rock_state(&rs, inode);
680 block = ei->i_iget5_block; 680 block = ei->i_iget5_block;
681 mutex_lock(&sbi->s_mutex);
682 bh = sb_bread(inode->i_sb, block); 681 bh = sb_bread(inode->i_sb, block);
683 if (!bh) 682 if (!bh)
684 goto out_noread; 683 goto out_noread;
@@ -748,7 +747,6 @@ repeat:
748 goto fail; 747 goto fail;
749 brelse(bh); 748 brelse(bh);
750 *rpnt = '\0'; 749 *rpnt = '\0';
751 mutex_unlock(&sbi->s_mutex);
752 SetPageUptodate(page); 750 SetPageUptodate(page);
753 kunmap(page); 751 kunmap(page);
754 unlock_page(page); 752 unlock_page(page);
@@ -765,7 +763,6 @@ out_bad_span:
765 printk("symlink spans iso9660 blocks\n"); 763 printk("symlink spans iso9660 blocks\n");
766fail: 764fail:
767 brelse(bh); 765 brelse(bh);
768 mutex_unlock(&sbi->s_mutex);
769error: 766error:
770 SetPageError(page); 767 SetPageError(page);
771 kunmap(page); 768 kunmap(page);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 828a0e1ea438..3675b3cdee89 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -259,12 +259,12 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
259 return rc; 259 return rc;
260} 260}
261 261
262int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags) 262int jffs2_check_acl(struct inode *inode, int mask)
263{ 263{
264 struct posix_acl *acl; 264 struct posix_acl *acl;
265 int rc; 265 int rc;
266 266
267 if (flags & IPERM_FLAG_RCU) 267 if (mask & MAY_NOT_BLOCK)
268 return -ECHILD; 268 return -ECHILD;
269 269
270 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); 270 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 3119f59253d3..5e42de8d9541 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29extern int jffs2_check_acl(struct inode *, int, unsigned int); 29extern int jffs2_check_acl(struct inode *, int);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 4bca6a2e5c07..5f243cd63afc 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -102,10 +102,8 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
102 mutex_unlock(&dir_f->sem); 102 mutex_unlock(&dir_f->sem);
103 if (ino) { 103 if (ino) {
104 inode = jffs2_iget(dir_i->i_sb, ino); 104 inode = jffs2_iget(dir_i->i_sb, ino);
105 if (IS_ERR(inode)) { 105 if (IS_ERR(inode))
106 printk(KERN_WARNING "iget() failed for ino #%u\n", ino); 106 printk(KERN_WARNING "iget() failed for ino #%u\n", ino);
107 return ERR_CAST(inode);
108 }
109 } 107 }
110 108
111 return d_splice_alias(inode, target); 109 return d_splice_alias(inode, target);
@@ -822,7 +820,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
822 820
823 if (victim_f) { 821 if (victim_f) {
824 /* There was a victim. Kill it off nicely */ 822 /* There was a victim. Kill it off nicely */
825 drop_nlink(new_dentry->d_inode); 823 if (S_ISDIR(new_dentry->d_inode->i_mode))
824 clear_nlink(new_dentry->d_inode);
825 else
826 drop_nlink(new_dentry->d_inode);
826 /* Don't oops if the victim was a dirent pointing to an 827 /* Don't oops if the victim was a dirent pointing to an
827 inode which didn't exist. */ 828 inode which didn't exist. */
828 if (victim_f->inocache) { 829 if (victim_f->inocache) {
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 1c0a08d711aa..3989f7e09f7f 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -27,13 +27,20 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
27 struct page **pagep, void **fsdata); 27 struct page **pagep, void **fsdata);
28static int jffs2_readpage (struct file *filp, struct page *pg); 28static int jffs2_readpage (struct file *filp, struct page *pg);
29 29
30int jffs2_fsync(struct file *filp, int datasync) 30int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
31{ 31{
32 struct inode *inode = filp->f_mapping->host; 32 struct inode *inode = filp->f_mapping->host;
33 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); 33 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
34 int ret;
35
36 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
37 if (ret)
38 return ret;
34 39
40 mutex_lock(&inode->i_mutex);
35 /* Trigger GC to flush any pending writes for this inode */ 41 /* Trigger GC to flush any pending writes for this inode */
36 jffs2_flush_wbuf_gc(c, inode->i_ino); 42 jffs2_flush_wbuf_gc(c, inode->i_ino);
43 mutex_unlock(&inode->i_mutex);
37 44
38 return 0; 45 return 0;
39} 46}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 65c6c43ca482..9c252835e8e5 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -158,7 +158,7 @@ extern const struct inode_operations jffs2_dir_inode_operations;
158extern const struct file_operations jffs2_file_operations; 158extern const struct file_operations jffs2_file_operations;
159extern const struct inode_operations jffs2_file_inode_operations; 159extern const struct inode_operations jffs2_file_inode_operations;
160extern const struct address_space_operations jffs2_file_address_operations; 160extern const struct address_space_operations jffs2_file_address_operations;
161int jffs2_fsync(struct file *, int); 161int jffs2_fsync(struct file *, loff_t, loff_t, int);
162int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); 162int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
163 163
164/* ioctl.c */ 164/* ioctl.c */
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e5de9422fa32..8a0a0666d5a6 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,11 +114,11 @@ out:
114 return rc; 114 return rc;
115} 115}
116 116
117int jfs_check_acl(struct inode *inode, int mask, unsigned int flags) 117int jfs_check_acl(struct inode *inode, int mask)
118{ 118{
119 struct posix_acl *acl; 119 struct posix_acl *acl;
120 120
121 if (flags & IPERM_FLAG_RCU) 121 if (mask & MAY_NOT_BLOCK)
122 return -ECHILD; 122 return -ECHILD;
123 123
124 acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 124 acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 2f3f531f3606..7527855b5cc6 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -28,19 +28,26 @@
28#include "jfs_acl.h" 28#include "jfs_acl.h"
29#include "jfs_debug.h" 29#include "jfs_debug.h"
30 30
31int jfs_fsync(struct file *file, int datasync) 31int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
32{ 32{
33 struct inode *inode = file->f_mapping->host; 33 struct inode *inode = file->f_mapping->host;
34 int rc = 0; 34 int rc = 0;
35 35
36 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
37 if (rc)
38 return rc;
39
40 mutex_lock(&inode->i_mutex);
36 if (!(inode->i_state & I_DIRTY) || 41 if (!(inode->i_state & I_DIRTY) ||
37 (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { 42 (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
38 /* Make sure committed changes hit the disk */ 43 /* Make sure committed changes hit the disk */
39 jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); 44 jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
45 mutex_unlock(&inode->i_mutex);
40 return rc; 46 return rc;
41 } 47 }
42 48
43 rc |= jfs_commit_inode(inode, 1); 49 rc |= jfs_commit_inode(inode, 1);
50 mutex_unlock(&inode->i_mutex);
44 51
45 return rc ? -EIO : 0; 52 return rc ? -EIO : 0;
46} 53}
@@ -110,6 +117,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
110 117
111 if ((iattr->ia_valid & ATTR_SIZE) && 118 if ((iattr->ia_valid & ATTR_SIZE) &&
112 iattr->ia_size != i_size_read(inode)) { 119 iattr->ia_size != i_size_read(inode)) {
120 inode_dio_wait(inode);
121
113 rc = vmtruncate(inode, iattr->ia_size); 122 rc = vmtruncate(inode, iattr->ia_size);
114 if (rc) 123 if (rc)
115 return rc; 124 return rc;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 109655904bbc..77b69b27f825 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -329,8 +329,8 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
329 struct inode *inode = file->f_mapping->host; 329 struct inode *inode = file->f_mapping->host;
330 ssize_t ret; 330 ssize_t ret;
331 331
332 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 332 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
333 offset, nr_segs, jfs_get_block, NULL); 333 jfs_get_block);
334 334
335 /* 335 /*
336 * In case of error extending write may have instantiated a few 336 * In case of error extending write may have instantiated a few
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index f9285c4900fa..54e07559878d 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_check_acl(struct inode *, int, unsigned int flags); 23int jfs_check_acl(struct inode *, int);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_acl_chmod(struct inode *inode); 25int jfs_acl_chmod(struct inode *inode);
26 26
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index ec2fb8b945fc..9271cfe4a149 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -21,7 +21,7 @@
21struct fid; 21struct fid;
22 22
23extern struct inode *ialloc(struct inode *, umode_t); 23extern struct inode *ialloc(struct inode *, umode_t);
24extern int jfs_fsync(struct file *, int); 24extern int jfs_fsync(struct file *, loff_t, loff_t, int);
25extern long jfs_ioctl(struct file *, unsigned int, unsigned long); 25extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
26extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); 26extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
27extern struct inode *jfs_iget(struct super_block *, unsigned long); 27extern struct inode *jfs_iget(struct super_block *, unsigned long);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index eaaf2b511e89..03787ef6a118 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1456,34 +1456,23 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1456 ino_t inum; 1456 ino_t inum;
1457 struct inode *ip; 1457 struct inode *ip;
1458 struct component_name key; 1458 struct component_name key;
1459 const char *name = dentry->d_name.name;
1460 int len = dentry->d_name.len;
1461 int rc; 1459 int rc;
1462 1460
1463 jfs_info("jfs_lookup: name = %s", name); 1461 jfs_info("jfs_lookup: name = %s", dentry->d_name.name);
1464 1462
1465 if ((name[0] == '.') && (len == 1)) 1463 if ((rc = get_UCSname(&key, dentry)))
1466 inum = dip->i_ino; 1464 return ERR_PTR(rc);
1467 else if (strcmp(name, "..") == 0) 1465 rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP);
1468 inum = PARENT(dip); 1466 free_UCSname(&key);
1469 else { 1467 if (rc == -ENOENT) {
1470 if ((rc = get_UCSname(&key, dentry))) 1468 ip = NULL;
1471 return ERR_PTR(rc); 1469 } else if (rc) {
1472 rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); 1470 jfs_err("jfs_lookup: dtSearch returned %d", rc);
1473 free_UCSname(&key); 1471 ip = ERR_PTR(rc);
1474 if (rc == -ENOENT) { 1472 } else {
1475 d_add(dentry, NULL); 1473 ip = jfs_iget(dip->i_sb, inum);
1476 return NULL; 1474 if (IS_ERR(ip))
1477 } else if (rc) { 1475 jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
1478 jfs_err("jfs_lookup: dtSearch returned %d", rc);
1479 return ERR_PTR(rc);
1480 }
1481 }
1482
1483 ip = jfs_iget(dip->i_sb, inum);
1484 if (IS_ERR(ip)) {
1485 jfs_err("jfs_lookup: iget failed on inum %d", (uint) inum);
1486 return ERR_CAST(ip);
1487 } 1476 }
1488 1477
1489 return d_splice_alias(ip, dentry); 1478 return d_splice_alias(ip, dentry);
@@ -1597,8 +1586,6 @@ out:
1597 1586
1598static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1587static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1599{ 1588{
1600 if (nd && nd->flags & LOOKUP_RCU)
1601 return -ECHILD;
1602 /* 1589 /*
1603 * This is not negative dentry. Always valid. 1590 * This is not negative dentry. Always valid.
1604 * 1591 *
@@ -1624,10 +1611,8 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1624 * case sensitive name which is specified by user if this is 1611 * case sensitive name which is specified by user if this is
1625 * for creation. 1612 * for creation.
1626 */ 1613 */
1627 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { 1614 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1628 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 1615 return 0;
1629 return 0;
1630 }
1631 return 1; 1616 return 1;
1632} 1617}
1633 1618
diff --git a/fs/libfs.c b/fs/libfs.c
index 275ca4749a2e..c18e9a1235b6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,6 +16,8 @@
16 16
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18 18
19#include "internal.h"
20
19static inline int simple_positive(struct dentry *dentry) 21static inline int simple_positive(struct dentry *dentry)
20{ 22{
21 return dentry->d_inode && !d_unhashed(dentry); 23 return dentry->d_inode && !d_unhashed(dentry);
@@ -246,13 +248,11 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
246 root->i_ino = 1; 248 root->i_ino = 1;
247 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; 249 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
248 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; 250 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
249 dentry = d_alloc(NULL, &d_name); 251 dentry = __d_alloc(s, &d_name);
250 if (!dentry) { 252 if (!dentry) {
251 iput(root); 253 iput(root);
252 goto Enomem; 254 goto Enomem;
253 } 255 }
254 dentry->d_sb = s;
255 dentry->d_parent = dentry;
256 d_instantiate(dentry, root); 256 d_instantiate(dentry, root);
257 s->s_root = dentry; 257 s->s_root = dentry;
258 s->s_d_op = dops; 258 s->s_d_op = dops;
@@ -328,8 +328,10 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
328 328
329 if (new_dentry->d_inode) { 329 if (new_dentry->d_inode) {
330 simple_unlink(new_dir, new_dentry); 330 simple_unlink(new_dir, new_dentry);
331 if (they_are_dirs) 331 if (they_are_dirs) {
332 drop_nlink(new_dentry->d_inode);
332 drop_nlink(old_dir); 333 drop_nlink(old_dir);
334 }
333 } else if (they_are_dirs) { 335 } else if (they_are_dirs) {
334 drop_nlink(old_dir); 336 drop_nlink(old_dir);
335 inc_nlink(new_dir); 337 inc_nlink(new_dir);
@@ -905,21 +907,29 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent);
905 * filesystems which track all non-inode metadata in the buffers list 907 * filesystems which track all non-inode metadata in the buffers list
906 * hanging off the address_space structure. 908 * hanging off the address_space structure.
907 */ 909 */
908int generic_file_fsync(struct file *file, int datasync) 910int generic_file_fsync(struct file *file, loff_t start, loff_t end,
911 int datasync)
909{ 912{
910 struct inode *inode = file->f_mapping->host; 913 struct inode *inode = file->f_mapping->host;
911 int err; 914 int err;
912 int ret; 915 int ret;
913 916
917 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
918 if (err)
919 return err;
920
921 mutex_lock(&inode->i_mutex);
914 ret = sync_mapping_buffers(inode->i_mapping); 922 ret = sync_mapping_buffers(inode->i_mapping);
915 if (!(inode->i_state & I_DIRTY)) 923 if (!(inode->i_state & I_DIRTY))
916 return ret; 924 goto out;
917 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 925 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
918 return ret; 926 goto out;
919 927
920 err = sync_inode_metadata(inode, 1); 928 err = sync_inode_metadata(inode, 1);
921 if (ret == 0) 929 if (ret == 0)
922 ret = err; 930 ret = err;
931out:
932 mutex_unlock(&inode->i_mutex);
923 return ret; 933 return ret;
924} 934}
925EXPORT_SYMBOL(generic_file_fsync); 935EXPORT_SYMBOL(generic_file_fsync);
@@ -956,7 +966,7 @@ EXPORT_SYMBOL(generic_check_addressable);
956/* 966/*
957 * No-op implementation of ->fsync for in-memory filesystems. 967 * No-op implementation of ->fsync for in-memory filesystems.
958 */ 968 */
959int noop_fsync(struct file *file, int datasync) 969int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
960{ 970{
961 return 0; 971 return 0;
962} 972}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1afae26cf236..b3ff3d894165 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -371,11 +371,9 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
371 page_cache_release(page); 371 page_cache_release(page);
372 372
373 inode = logfs_iget(dir->i_sb, ino); 373 inode = logfs_iget(dir->i_sb, ino);
374 if (IS_ERR(inode)) { 374 if (IS_ERR(inode))
375 printk(KERN_ERR"LogFS: Cannot read inode #%llx for dentry (%lx, %lx)n", 375 printk(KERN_ERR"LogFS: Cannot read inode #%llx for dentry (%lx, %lx)n",
376 ino, dir->i_ino, index); 376 ino, dir->i_ino, index);
377 return ERR_CAST(inode);
378 }
379 return d_splice_alias(inode, dentry); 377 return d_splice_alias(inode, dentry);
380} 378}
381 379
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index c2ad7028def4..b548c87a86f1 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -219,11 +219,20 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
219 } 219 }
220} 220}
221 221
222int logfs_fsync(struct file *file, int datasync) 222int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
223{ 223{
224 struct super_block *sb = file->f_mapping->host->i_sb; 224 struct super_block *sb = file->f_mapping->host->i_sb;
225 struct inode *inode = file->f_mapping->host;
226 int ret;
227
228 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
229 if (ret)
230 return ret;
225 231
232 mutex_lock(&inode->i_mutex);
226 logfs_write_anchor(sb); 233 logfs_write_anchor(sb);
234 mutex_unlock(&inode->i_mutex);
235
227 return 0; 236 return 0;
228} 237}
229 238
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 57afd4a6fabb..f22d108bfa5d 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -506,7 +506,7 @@ extern const struct file_operations logfs_reg_fops;
506extern const struct address_space_operations logfs_reg_aops; 506extern const struct address_space_operations logfs_reg_aops;
507int logfs_readpage(struct file *file, struct page *page); 507int logfs_readpage(struct file *file, struct page *page);
508long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 508long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
509int logfs_fsync(struct file *file, int datasync); 509int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
510 510
511/* gc.c */ 511/* gc.c */
512u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec); 512u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index adcdc0a4e182..e7d23e25bf1d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -596,8 +596,7 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
596 596
597int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 597int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
598{ 598{
599 struct inode *dir = dentry->d_parent->d_inode; 599 struct super_block *sb = dentry->d_sb;
600 struct super_block *sb = dir->i_sb;
601 generic_fillattr(dentry->d_inode, stat); 600 generic_fillattr(dentry->d_inode, stat);
602 if (INODE_VERSION(dentry->d_inode) == MINIX_V1) 601 if (INODE_VERSION(dentry->d_inode) == MINIX_V1)
603 stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb); 602 stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
diff --git a/fs/namei.c b/fs/namei.c
index 14ab8d3f2f0c..b7fad009bbf6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -176,12 +176,12 @@ EXPORT_SYMBOL(putname);
176/* 176/*
177 * This does basic POSIX ACL permission checking 177 * This does basic POSIX ACL permission checking
178 */ 178 */
179static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, 179static int acl_permission_check(struct inode *inode, int mask)
180 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
181{ 180{
181 int (*check_acl)(struct inode *inode, int mask);
182 unsigned int mode = inode->i_mode; 182 unsigned int mode = inode->i_mode;
183 183
184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK;
185 185
186 if (current_user_ns() != inode_userns(inode)) 186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms; 187 goto other_perms;
@@ -189,8 +189,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
189 if (current_fsuid() == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
190 mode >>= 6; 190 mode >>= 6;
191 else { 191 else {
192 check_acl = inode->i_op->check_acl;
192 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 193 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
193 int error = check_acl(inode, mask, flags); 194 int error = check_acl(inode, mask);
194 if (error != -EAGAIN) 195 if (error != -EAGAIN)
195 return error; 196 return error;
196 } 197 }
@@ -203,7 +204,7 @@ other_perms:
203 /* 204 /*
204 * If the DACs are ok we don't need any capability check. 205 * If the DACs are ok we don't need any capability check.
205 */ 206 */
206 if ((mask & ~mode) == 0) 207 if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
207 return 0; 208 return 0;
208 return -EACCES; 209 return -EACCES;
209} 210}
@@ -212,8 +213,6 @@ other_perms:
212 * generic_permission - check for access rights on a Posix-like filesystem 213 * generic_permission - check for access rights on a Posix-like filesystem
213 * @inode: inode to check access rights for 214 * @inode: inode to check access rights for
214 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 215 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
215 * @check_acl: optional callback to check for Posix ACLs
216 * @flags: IPERM_FLAG_ flags.
217 * 216 *
218 * Used to check for read/write/execute permissions on a file. 217 * Used to check for read/write/execute permissions on a file.
219 * We use "fsuid" for this, letting us set arbitrary permissions 218 * We use "fsuid" for this, letting us set arbitrary permissions
@@ -224,24 +223,32 @@ other_perms:
224 * request cannot be satisfied (eg. requires blocking or too much complexity). 223 * request cannot be satisfied (eg. requires blocking or too much complexity).
225 * It would then be called again in ref-walk mode. 224 * It would then be called again in ref-walk mode.
226 */ 225 */
227int generic_permission(struct inode *inode, int mask, unsigned int flags, 226int generic_permission(struct inode *inode, int mask)
228 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
229{ 227{
230 int ret; 228 int ret;
231 229
232 /* 230 /*
233 * Do the basic POSIX ACL permission checks. 231 * Do the basic POSIX ACL permission checks.
234 */ 232 */
235 ret = acl_permission_check(inode, mask, flags, check_acl); 233 ret = acl_permission_check(inode, mask);
236 if (ret != -EACCES) 234 if (ret != -EACCES)
237 return ret; 235 return ret;
238 236
237 if (S_ISDIR(inode->i_mode)) {
238 /* DACs are overridable for directories */
239 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
240 return 0;
241 if (!(mask & MAY_WRITE))
242 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
243 return 0;
244 return -EACCES;
245 }
239 /* 246 /*
240 * Read/write DACs are always overridable. 247 * Read/write DACs are always overridable.
241 * Executable DACs are overridable for all directories and 248 * Executable DACs are overridable when there is
242 * for non-directories that have least one exec bit set. 249 * at least one exec bit set.
243 */ 250 */
244 if (!(mask & MAY_EXEC) || execute_ok(inode)) 251 if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
245 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) 252 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
246 return 0; 253 return 0;
247 254
@@ -249,7 +256,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
249 * Searching includes executable on directories, else just read. 256 * Searching includes executable on directories, else just read.
250 */ 257 */
251 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 258 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
252 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 259 if (mask == MAY_READ)
253 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) 260 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
254 return 0; 261 return 0;
255 262
@@ -288,10 +295,9 @@ int inode_permission(struct inode *inode, int mask)
288 } 295 }
289 296
290 if (inode->i_op->permission) 297 if (inode->i_op->permission)
291 retval = inode->i_op->permission(inode, mask, 0); 298 retval = inode->i_op->permission(inode, mask);
292 else 299 else
293 retval = generic_permission(inode, mask, 0, 300 retval = generic_permission(inode, mask);
294 inode->i_op->check_acl);
295 301
296 if (retval) 302 if (retval)
297 return retval; 303 return retval;
@@ -304,69 +310,6 @@ int inode_permission(struct inode *inode, int mask)
304} 310}
305 311
306/** 312/**
307 * file_permission - check for additional access rights to a given file
308 * @file: file to check access rights for
309 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
310 *
311 * Used to check for read/write/execute permissions on an already opened
312 * file.
313 *
314 * Note:
315 * Do not use this function in new code. All access checks should
316 * be done using inode_permission().
317 */
318int file_permission(struct file *file, int mask)
319{
320 return inode_permission(file->f_path.dentry->d_inode, mask);
321}
322
323/*
324 * get_write_access() gets write permission for a file.
325 * put_write_access() releases this write permission.
326 * This is used for regular files.
327 * We cannot support write (and maybe mmap read-write shared) accesses and
328 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
329 * can have the following values:
330 * 0: no writers, no VM_DENYWRITE mappings
331 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
332 * > 0: (i_writecount) users are writing to the file.
333 *
334 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
335 * except for the cases where we don't hold i_writecount yet. Then we need to
336 * use {get,deny}_write_access() - these functions check the sign and refuse
337 * to do the change if sign is wrong. Exclusion between them is provided by
338 * the inode->i_lock spinlock.
339 */
340
341int get_write_access(struct inode * inode)
342{
343 spin_lock(&inode->i_lock);
344 if (atomic_read(&inode->i_writecount) < 0) {
345 spin_unlock(&inode->i_lock);
346 return -ETXTBSY;
347 }
348 atomic_inc(&inode->i_writecount);
349 spin_unlock(&inode->i_lock);
350
351 return 0;
352}
353
354int deny_write_access(struct file * file)
355{
356 struct inode *inode = file->f_path.dentry->d_inode;
357
358 spin_lock(&inode->i_lock);
359 if (atomic_read(&inode->i_writecount) > 0) {
360 spin_unlock(&inode->i_lock);
361 return -ETXTBSY;
362 }
363 atomic_dec(&inode->i_writecount);
364 spin_unlock(&inode->i_lock);
365
366 return 0;
367}
368
369/**
370 * path_get - get a reference to a path 313 * path_get - get a reference to a path
371 * @path: path to get the reference to 314 * @path: path to get the reference to
372 * 315 *
@@ -492,28 +435,6 @@ static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
492 return dentry->d_op->d_revalidate(dentry, nd); 435 return dentry->d_op->d_revalidate(dentry, nd);
493} 436}
494 437
495static struct dentry *
496do_revalidate(struct dentry *dentry, struct nameidata *nd)
497{
498 int status = d_revalidate(dentry, nd);
499 if (unlikely(status <= 0)) {
500 /*
501 * The dentry failed validation.
502 * If d_revalidate returned 0 attempt to invalidate
503 * the dentry otherwise d_revalidate is asking us
504 * to return a fail status.
505 */
506 if (status < 0) {
507 dput(dentry);
508 dentry = ERR_PTR(status);
509 } else if (!d_invalidate(dentry)) {
510 dput(dentry);
511 dentry = NULL;
512 }
513 }
514 return dentry;
515}
516
517/** 438/**
518 * complete_walk - successful completion of path walk 439 * complete_walk - successful completion of path walk
519 * @nd: pointer nameidata 440 * @nd: pointer nameidata
@@ -568,40 +489,6 @@ static int complete_walk(struct nameidata *nd)
568 return status; 489 return status;
569} 490}
570 491
571/*
572 * Short-cut version of permission(), for calling on directories
573 * during pathname resolution. Combines parts of permission()
574 * and generic_permission(), and tests ONLY for MAY_EXEC permission.
575 *
576 * If appropriate, check DAC only. If not appropriate, or
577 * short-cut DAC fails, then call ->permission() to do more
578 * complete permission check.
579 */
580static inline int exec_permission(struct inode *inode, unsigned int flags)
581{
582 int ret;
583 struct user_namespace *ns = inode_userns(inode);
584
585 if (inode->i_op->permission) {
586 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
587 } else {
588 ret = acl_permission_check(inode, MAY_EXEC, flags,
589 inode->i_op->check_acl);
590 }
591 if (likely(!ret))
592 goto ok;
593 if (ret == -ECHILD)
594 return ret;
595
596 if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
597 ns_capable(ns, CAP_DAC_READ_SEARCH))
598 goto ok;
599
600 return ret;
601ok:
602 return security_inode_exec_permission(inode, flags);
603}
604
605static __always_inline void set_root(struct nameidata *nd) 492static __always_inline void set_root(struct nameidata *nd)
606{ 493{
607 if (!nd->root.mnt) 494 if (!nd->root.mnt)
@@ -776,7 +663,7 @@ static int follow_automount(struct path *path, unsigned flags,
776 /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT 663 /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
777 * and this is the terminal part of the path. 664 * and this is the terminal part of the path.
778 */ 665 */
779 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE)) 666 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
780 return -EISDIR; /* we actually want to stop here */ 667 return -EISDIR; /* we actually want to stop here */
781 668
782 /* We want to mount if someone is trying to open/create a file of any 669 /* We want to mount if someone is trying to open/create a file of any
@@ -788,7 +675,7 @@ static int follow_automount(struct path *path, unsigned flags,
788 * appended a '/' to the name. 675 * appended a '/' to the name.
789 */ 676 */
790 if (!(flags & LOOKUP_FOLLOW) && 677 if (!(flags & LOOKUP_FOLLOW) &&
791 !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY | 678 !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
792 LOOKUP_OPEN | LOOKUP_CREATE))) 679 LOOKUP_OPEN | LOOKUP_CREATE)))
793 return -EISDIR; 680 return -EISDIR;
794 681
@@ -807,7 +694,7 @@ static int follow_automount(struct path *path, unsigned flags,
807 * the path being looked up; if it wasn't then the remainder of 694 * the path being looked up; if it wasn't then the remainder of
808 * the path is inaccessible and we should say so. 695 * the path is inaccessible and we should say so.
809 */ 696 */
810 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE)) 697 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT))
811 return -EREMOTE; 698 return -EREMOTE;
812 return PTR_ERR(mnt); 699 return PTR_ERR(mnt);
813 } 700 }
@@ -1134,6 +1021,30 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
1134} 1021}
1135 1022
1136/* 1023/*
1024 * We already have a dentry, but require a lookup to be performed on the parent
1025 * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error.
1026 * parent->d_inode->i_mutex must be held. d_lookup must have verified that no
1027 * child exists while under i_mutex.
1028 */
1029static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry,
1030 struct nameidata *nd)
1031{
1032 struct inode *inode = parent->d_inode;
1033 struct dentry *old;
1034
1035 /* Don't create child dentry for a dead directory. */
1036 if (unlikely(IS_DEADDIR(inode)))
1037 return ERR_PTR(-ENOENT);
1038
1039 old = inode->i_op->lookup(inode, dentry, nd);
1040 if (unlikely(old)) {
1041 dput(dentry);
1042 dentry = old;
1043 }
1044 return dentry;
1045}
1046
1047/*
1137 * It's more convoluted than I'd like it to be, but... it's still fairly 1048 * It's more convoluted than I'd like it to be, but... it's still fairly
1138 * small and for now I'd prefer to have fast path as straight as possible. 1049 * small and for now I'd prefer to have fast path as straight as possible.
1139 * It _is_ time-critical. 1050 * It _is_ time-critical.
@@ -1172,6 +1083,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1172 goto unlazy; 1083 goto unlazy;
1173 } 1084 }
1174 } 1085 }
1086 if (unlikely(d_need_lookup(dentry)))
1087 goto unlazy;
1175 path->mnt = mnt; 1088 path->mnt = mnt;
1176 path->dentry = dentry; 1089 path->dentry = dentry;
1177 if (unlikely(!__follow_mount_rcu(nd, path, inode))) 1090 if (unlikely(!__follow_mount_rcu(nd, path, inode)))
@@ -1186,6 +1099,10 @@ unlazy:
1186 dentry = __d_lookup(parent, name); 1099 dentry = __d_lookup(parent, name);
1187 } 1100 }
1188 1101
1102 if (dentry && unlikely(d_need_lookup(dentry))) {
1103 dput(dentry);
1104 dentry = NULL;
1105 }
1189retry: 1106retry:
1190 if (unlikely(!dentry)) { 1107 if (unlikely(!dentry)) {
1191 struct inode *dir = parent->d_inode; 1108 struct inode *dir = parent->d_inode;
@@ -1202,6 +1119,15 @@ retry:
1202 /* known good */ 1119 /* known good */
1203 need_reval = 0; 1120 need_reval = 0;
1204 status = 1; 1121 status = 1;
1122 } else if (unlikely(d_need_lookup(dentry))) {
1123 dentry = d_inode_lookup(parent, dentry, nd);
1124 if (IS_ERR(dentry)) {
1125 mutex_unlock(&dir->i_mutex);
1126 return PTR_ERR(dentry);
1127 }
1128 /* known good */
1129 need_reval = 0;
1130 status = 1;
1205 } 1131 }
1206 mutex_unlock(&dir->i_mutex); 1132 mutex_unlock(&dir->i_mutex);
1207 } 1133 }
@@ -1234,13 +1160,13 @@ retry:
1234static inline int may_lookup(struct nameidata *nd) 1160static inline int may_lookup(struct nameidata *nd)
1235{ 1161{
1236 if (nd->flags & LOOKUP_RCU) { 1162 if (nd->flags & LOOKUP_RCU) {
1237 int err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1163 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
1238 if (err != -ECHILD) 1164 if (err != -ECHILD)
1239 return err; 1165 return err;
1240 if (unlazy_walk(nd, NULL)) 1166 if (unlazy_walk(nd, NULL))
1241 return -ECHILD; 1167 return -ECHILD;
1242 } 1168 }
1243 return exec_permission(nd->inode, 0); 1169 return inode_permission(nd->inode, MAY_EXEC);
1244} 1170}
1245 1171
1246static inline int handle_dots(struct nameidata *nd, int type) 1172static inline int handle_dots(struct nameidata *nd, int type)
@@ -1354,7 +1280,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1354{ 1280{
1355 struct path next; 1281 struct path next;
1356 int err; 1282 int err;
1357 unsigned int lookup_flags = nd->flags;
1358 1283
1359 while (*name=='/') 1284 while (*name=='/')
1360 name++; 1285 name++;
@@ -1368,8 +1293,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1368 unsigned int c; 1293 unsigned int c;
1369 int type; 1294 int type;
1370 1295
1371 nd->flags |= LOOKUP_CONTINUE;
1372
1373 err = may_lookup(nd); 1296 err = may_lookup(nd);
1374 if (err) 1297 if (err)
1375 break; 1298 break;
@@ -1431,8 +1354,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1431 /* here ends the main loop */ 1354 /* here ends the main loop */
1432 1355
1433last_component: 1356last_component:
1434 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1435 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1436 nd->last = this; 1357 nd->last = this;
1437 nd->last_type = type; 1358 nd->last_type = type;
1438 return 0; 1359 return 0;
@@ -1515,7 +1436,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1515 if (!S_ISDIR(dentry->d_inode->i_mode)) 1436 if (!S_ISDIR(dentry->d_inode->i_mode))
1516 goto fput_fail; 1437 goto fput_fail;
1517 1438
1518 retval = file_permission(file, MAY_EXEC); 1439 retval = inode_permission(dentry->d_inode, MAY_EXEC);
1519 if (retval) 1440 if (retval)
1520 goto fput_fail; 1441 goto fput_fail;
1521 } 1442 }
@@ -1653,16 +1574,22 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
1653 * @mnt: pointer to vfs mount of the base directory 1574 * @mnt: pointer to vfs mount of the base directory
1654 * @name: pointer to file name 1575 * @name: pointer to file name
1655 * @flags: lookup flags 1576 * @flags: lookup flags
1656 * @nd: pointer to nameidata 1577 * @path: pointer to struct path to fill
1657 */ 1578 */
1658int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, 1579int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1659 const char *name, unsigned int flags, 1580 const char *name, unsigned int flags,
1660 struct nameidata *nd) 1581 struct path *path)
1661{ 1582{
1662 nd->root.dentry = dentry; 1583 struct nameidata nd;
1663 nd->root.mnt = mnt; 1584 int err;
1585 nd.root.dentry = dentry;
1586 nd.root.mnt = mnt;
1587 BUG_ON(flags & LOOKUP_PARENT);
1664 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 1588 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1665 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd); 1589 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
1590 if (!err)
1591 *path = nd.path;
1592 return err;
1666} 1593}
1667 1594
1668static struct dentry *__lookup_hash(struct qstr *name, 1595static struct dentry *__lookup_hash(struct qstr *name,
@@ -1672,7 +1599,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1672 struct dentry *dentry; 1599 struct dentry *dentry;
1673 int err; 1600 int err;
1674 1601
1675 err = exec_permission(inode, 0); 1602 err = inode_permission(inode, MAY_EXEC);
1676 if (err) 1603 if (err)
1677 return ERR_PTR(err); 1604 return ERR_PTR(err);
1678 1605
@@ -1683,8 +1610,34 @@ static struct dentry *__lookup_hash(struct qstr *name,
1683 */ 1610 */
1684 dentry = d_lookup(base, name); 1611 dentry = d_lookup(base, name);
1685 1612
1686 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) 1613 if (dentry && d_need_lookup(dentry)) {
1687 dentry = do_revalidate(dentry, nd); 1614 /*
1615 * __lookup_hash is called with the parent dir's i_mutex already
1616 * held, so we are good to go here.
1617 */
1618 dentry = d_inode_lookup(base, dentry, nd);
1619 if (IS_ERR(dentry))
1620 return dentry;
1621 }
1622
1623 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1624 int status = d_revalidate(dentry, nd);
1625 if (unlikely(status <= 0)) {
1626 /*
1627 * The dentry failed validation.
1628 * If d_revalidate returned 0 attempt to invalidate
1629 * the dentry otherwise d_revalidate is asking us
1630 * to return a fail status.
1631 */
1632 if (status < 0) {
1633 dput(dentry);
1634 return ERR_PTR(status);
1635 } else if (!d_invalidate(dentry)) {
1636 dput(dentry);
1637 dentry = NULL;
1638 }
1639 }
1640 }
1688 1641
1689 if (!dentry) 1642 if (!dentry)
1690 dentry = d_alloc_and_lookup(base, name, nd); 1643 dentry = d_alloc_and_lookup(base, name, nd);
@@ -2012,27 +1965,10 @@ static int handle_truncate(struct file *filp)
2012 return error; 1965 return error;
2013} 1966}
2014 1967
2015/*
2016 * Note that while the flag value (low two bits) for sys_open means:
2017 * 00 - read-only
2018 * 01 - write-only
2019 * 10 - read-write
2020 * 11 - special
2021 * it is changed into
2022 * 00 - no permissions needed
2023 * 01 - read-permission
2024 * 10 - write-permission
2025 * 11 - read-write
2026 * for the internal routines (ie open_namei()/follow_link() etc)
2027 * This is more logical, and also allows the 00 "no perm needed"
2028 * to be used for symlinks (where the permissions are checked
2029 * later).
2030 *
2031*/
2032static inline int open_to_namei_flags(int flag) 1968static inline int open_to_namei_flags(int flag)
2033{ 1969{
2034 if ((flag+1) & O_ACCMODE) 1970 if ((flag & O_ACCMODE) == 3)
2035 flag++; 1971 flag--;
2036 return flag; 1972 return flag;
2037} 1973}
2038 1974
@@ -2327,35 +2263,29 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2327 return file; 2263 return file;
2328} 2264}
2329 2265
2330/** 2266struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
2331 * lookup_create - lookup a dentry, creating it if it doesn't exist
2332 * @nd: nameidata info
2333 * @is_dir: directory flag
2334 *
2335 * Simple function to lookup and return a dentry and create it
2336 * if it doesn't exist. Is SMP-safe.
2337 *
2338 * Returns with nd->path.dentry->d_inode->i_mutex locked.
2339 */
2340struct dentry *lookup_create(struct nameidata *nd, int is_dir)
2341{ 2267{
2342 struct dentry *dentry = ERR_PTR(-EEXIST); 2268 struct dentry *dentry = ERR_PTR(-EEXIST);
2269 struct nameidata nd;
2270 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
2271 if (error)
2272 return ERR_PTR(error);
2343 2273
2344 mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2345 /* 2274 /*
2346 * Yucky last component or no last component at all? 2275 * Yucky last component or no last component at all?
2347 * (foo/., foo/.., /////) 2276 * (foo/., foo/.., /////)
2348 */ 2277 */
2349 if (nd->last_type != LAST_NORM) 2278 if (nd.last_type != LAST_NORM)
2350 goto fail; 2279 goto out;
2351 nd->flags &= ~LOOKUP_PARENT; 2280 nd.flags &= ~LOOKUP_PARENT;
2352 nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL; 2281 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
2353 nd->intent.open.flags = O_EXCL; 2282 nd.intent.open.flags = O_EXCL;
2354 2283
2355 /* 2284 /*
2356 * Do the final lookup. 2285 * Do the final lookup.
2357 */ 2286 */
2358 dentry = lookup_hash(nd); 2287 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2288 dentry = lookup_hash(&nd);
2359 if (IS_ERR(dentry)) 2289 if (IS_ERR(dentry))
2360 goto fail; 2290 goto fail;
2361 2291
@@ -2367,18 +2297,35 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
2367 * all is fine. Let's be bastards - you had / on the end, you've 2297 * all is fine. Let's be bastards - you had / on the end, you've
2368 * been asking for (non-existent) directory. -ENOENT for you. 2298 * been asking for (non-existent) directory. -ENOENT for you.
2369 */ 2299 */
2370 if (unlikely(!is_dir && nd->last.name[nd->last.len])) { 2300 if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
2371 dput(dentry); 2301 dput(dentry);
2372 dentry = ERR_PTR(-ENOENT); 2302 dentry = ERR_PTR(-ENOENT);
2303 goto fail;
2373 } 2304 }
2305 *path = nd.path;
2374 return dentry; 2306 return dentry;
2375eexist: 2307eexist:
2376 dput(dentry); 2308 dput(dentry);
2377 dentry = ERR_PTR(-EEXIST); 2309 dentry = ERR_PTR(-EEXIST);
2378fail: 2310fail:
2311 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2312out:
2313 path_put(&nd.path);
2379 return dentry; 2314 return dentry;
2380} 2315}
2381EXPORT_SYMBOL_GPL(lookup_create); 2316EXPORT_SYMBOL(kern_path_create);
2317
2318struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
2319{
2320 char *tmp = getname(pathname);
2321 struct dentry *res;
2322 if (IS_ERR(tmp))
2323 return ERR_CAST(tmp);
2324 res = kern_path_create(dfd, tmp, path, is_dir);
2325 putname(tmp);
2326 return res;
2327}
2328EXPORT_SYMBOL(user_path_create);
2382 2329
2383int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 2330int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2384{ 2331{
@@ -2428,54 +2375,46 @@ static int may_mknod(mode_t mode)
2428SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, 2375SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
2429 unsigned, dev) 2376 unsigned, dev)
2430{ 2377{
2431 int error;
2432 char *tmp;
2433 struct dentry *dentry; 2378 struct dentry *dentry;
2434 struct nameidata nd; 2379 struct path path;
2380 int error;
2435 2381
2436 if (S_ISDIR(mode)) 2382 if (S_ISDIR(mode))
2437 return -EPERM; 2383 return -EPERM;
2438 2384
2439 error = user_path_parent(dfd, filename, &nd, &tmp); 2385 dentry = user_path_create(dfd, filename, &path, 0);
2440 if (error) 2386 if (IS_ERR(dentry))
2441 return error; 2387 return PTR_ERR(dentry);
2442 2388
2443 dentry = lookup_create(&nd, 0); 2389 if (!IS_POSIXACL(path.dentry->d_inode))
2444 if (IS_ERR(dentry)) {
2445 error = PTR_ERR(dentry);
2446 goto out_unlock;
2447 }
2448 if (!IS_POSIXACL(nd.path.dentry->d_inode))
2449 mode &= ~current_umask(); 2390 mode &= ~current_umask();
2450 error = may_mknod(mode); 2391 error = may_mknod(mode);
2451 if (error) 2392 if (error)
2452 goto out_dput; 2393 goto out_dput;
2453 error = mnt_want_write(nd.path.mnt); 2394 error = mnt_want_write(path.mnt);
2454 if (error) 2395 if (error)
2455 goto out_dput; 2396 goto out_dput;
2456 error = security_path_mknod(&nd.path, dentry, mode, dev); 2397 error = security_path_mknod(&path, dentry, mode, dev);
2457 if (error) 2398 if (error)
2458 goto out_drop_write; 2399 goto out_drop_write;
2459 switch (mode & S_IFMT) { 2400 switch (mode & S_IFMT) {
2460 case 0: case S_IFREG: 2401 case 0: case S_IFREG:
2461 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2402 error = vfs_create(path.dentry->d_inode,dentry,mode,NULL);
2462 break; 2403 break;
2463 case S_IFCHR: case S_IFBLK: 2404 case S_IFCHR: case S_IFBLK:
2464 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode, 2405 error = vfs_mknod(path.dentry->d_inode,dentry,mode,
2465 new_decode_dev(dev)); 2406 new_decode_dev(dev));
2466 break; 2407 break;
2467 case S_IFIFO: case S_IFSOCK: 2408 case S_IFIFO: case S_IFSOCK:
2468 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2409 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
2469 break; 2410 break;
2470 } 2411 }
2471out_drop_write: 2412out_drop_write:
2472 mnt_drop_write(nd.path.mnt); 2413 mnt_drop_write(path.mnt);
2473out_dput: 2414out_dput:
2474 dput(dentry); 2415 dput(dentry);
2475out_unlock: 2416 mutex_unlock(&path.dentry->d_inode->i_mutex);
2476 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2417 path_put(&path);
2477 path_put(&nd.path);
2478 putname(tmp);
2479 2418
2480 return error; 2419 return error;
2481} 2420}
@@ -2508,38 +2447,29 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2508 2447
2509SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) 2448SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
2510{ 2449{
2511 int error = 0;
2512 char * tmp;
2513 struct dentry *dentry; 2450 struct dentry *dentry;
2514 struct nameidata nd; 2451 struct path path;
2515 2452 int error;
2516 error = user_path_parent(dfd, pathname, &nd, &tmp);
2517 if (error)
2518 goto out_err;
2519 2453
2520 dentry = lookup_create(&nd, 1); 2454 dentry = user_path_create(dfd, pathname, &path, 1);
2521 error = PTR_ERR(dentry);
2522 if (IS_ERR(dentry)) 2455 if (IS_ERR(dentry))
2523 goto out_unlock; 2456 return PTR_ERR(dentry);
2524 2457
2525 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2458 if (!IS_POSIXACL(path.dentry->d_inode))
2526 mode &= ~current_umask(); 2459 mode &= ~current_umask();
2527 error = mnt_want_write(nd.path.mnt); 2460 error = mnt_want_write(path.mnt);
2528 if (error) 2461 if (error)
2529 goto out_dput; 2462 goto out_dput;
2530 error = security_path_mkdir(&nd.path, dentry, mode); 2463 error = security_path_mkdir(&path, dentry, mode);
2531 if (error) 2464 if (error)
2532 goto out_drop_write; 2465 goto out_drop_write;
2533 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2466 error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
2534out_drop_write: 2467out_drop_write:
2535 mnt_drop_write(nd.path.mnt); 2468 mnt_drop_write(path.mnt);
2536out_dput: 2469out_dput:
2537 dput(dentry); 2470 dput(dentry);
2538out_unlock: 2471 mutex_unlock(&path.dentry->d_inode->i_mutex);
2539 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2472 path_put(&path);
2540 path_put(&nd.path);
2541 putname(tmp);
2542out_err:
2543 return error; 2473 return error;
2544} 2474}
2545 2475
@@ -2799,38 +2729,31 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
2799{ 2729{
2800 int error; 2730 int error;
2801 char *from; 2731 char *from;
2802 char *to;
2803 struct dentry *dentry; 2732 struct dentry *dentry;
2804 struct nameidata nd; 2733 struct path path;
2805 2734
2806 from = getname(oldname); 2735 from = getname(oldname);
2807 if (IS_ERR(from)) 2736 if (IS_ERR(from))
2808 return PTR_ERR(from); 2737 return PTR_ERR(from);
2809 2738
2810 error = user_path_parent(newdfd, newname, &nd, &to); 2739 dentry = user_path_create(newdfd, newname, &path, 0);
2811 if (error)
2812 goto out_putname;
2813
2814 dentry = lookup_create(&nd, 0);
2815 error = PTR_ERR(dentry); 2740 error = PTR_ERR(dentry);
2816 if (IS_ERR(dentry)) 2741 if (IS_ERR(dentry))
2817 goto out_unlock; 2742 goto out_putname;
2818 2743
2819 error = mnt_want_write(nd.path.mnt); 2744 error = mnt_want_write(path.mnt);
2820 if (error) 2745 if (error)
2821 goto out_dput; 2746 goto out_dput;
2822 error = security_path_symlink(&nd.path, dentry, from); 2747 error = security_path_symlink(&path, dentry, from);
2823 if (error) 2748 if (error)
2824 goto out_drop_write; 2749 goto out_drop_write;
2825 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); 2750 error = vfs_symlink(path.dentry->d_inode, dentry, from);
2826out_drop_write: 2751out_drop_write:
2827 mnt_drop_write(nd.path.mnt); 2752 mnt_drop_write(path.mnt);
2828out_dput: 2753out_dput:
2829 dput(dentry); 2754 dput(dentry);
2830out_unlock: 2755 mutex_unlock(&path.dentry->d_inode->i_mutex);
2831 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2756 path_put(&path);
2832 path_put(&nd.path);
2833 putname(to);
2834out_putname: 2757out_putname:
2835 putname(from); 2758 putname(from);
2836 return error; 2759 return error;
@@ -2895,11 +2818,9 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
2895 int, newdfd, const char __user *, newname, int, flags) 2818 int, newdfd, const char __user *, newname, int, flags)
2896{ 2819{
2897 struct dentry *new_dentry; 2820 struct dentry *new_dentry;
2898 struct nameidata nd; 2821 struct path old_path, new_path;
2899 struct path old_path;
2900 int how = 0; 2822 int how = 0;
2901 int error; 2823 int error;
2902 char *to;
2903 2824
2904 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) 2825 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
2905 return -EINVAL; 2826 return -EINVAL;
@@ -2921,32 +2842,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
2921 if (error) 2842 if (error)
2922 return error; 2843 return error;
2923 2844
2924 error = user_path_parent(newdfd, newname, &nd, &to); 2845 new_dentry = user_path_create(newdfd, newname, &new_path, 0);
2925 if (error)
2926 goto out;
2927 error = -EXDEV;
2928 if (old_path.mnt != nd.path.mnt)
2929 goto out_release;
2930 new_dentry = lookup_create(&nd, 0);
2931 error = PTR_ERR(new_dentry); 2846 error = PTR_ERR(new_dentry);
2932 if (IS_ERR(new_dentry)) 2847 if (IS_ERR(new_dentry))
2933 goto out_unlock; 2848 goto out;
2934 error = mnt_want_write(nd.path.mnt); 2849
2850 error = -EXDEV;
2851 if (old_path.mnt != new_path.mnt)
2852 goto out_dput;
2853 error = mnt_want_write(new_path.mnt);
2935 if (error) 2854 if (error)
2936 goto out_dput; 2855 goto out_dput;
2937 error = security_path_link(old_path.dentry, &nd.path, new_dentry); 2856 error = security_path_link(old_path.dentry, &new_path, new_dentry);
2938 if (error) 2857 if (error)
2939 goto out_drop_write; 2858 goto out_drop_write;
2940 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); 2859 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
2941out_drop_write: 2860out_drop_write:
2942 mnt_drop_write(nd.path.mnt); 2861 mnt_drop_write(new_path.mnt);
2943out_dput: 2862out_dput:
2944 dput(new_dentry); 2863 dput(new_dentry);
2945out_unlock: 2864 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
2946 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2865 path_put(&new_path);
2947out_release:
2948 path_put(&nd.path);
2949 putname(to);
2950out: 2866out:
2951 path_put(&old_path); 2867 path_put(&old_path);
2952 2868
@@ -3352,11 +3268,9 @@ EXPORT_SYMBOL(page_readlink);
3352EXPORT_SYMBOL(__page_symlink); 3268EXPORT_SYMBOL(__page_symlink);
3353EXPORT_SYMBOL(page_symlink); 3269EXPORT_SYMBOL(page_symlink);
3354EXPORT_SYMBOL(page_symlink_inode_operations); 3270EXPORT_SYMBOL(page_symlink_inode_operations);
3355EXPORT_SYMBOL(kern_path_parent);
3356EXPORT_SYMBOL(kern_path); 3271EXPORT_SYMBOL(kern_path);
3357EXPORT_SYMBOL(vfs_path_lookup); 3272EXPORT_SYMBOL(vfs_path_lookup);
3358EXPORT_SYMBOL(inode_permission); 3273EXPORT_SYMBOL(inode_permission);
3359EXPORT_SYMBOL(file_permission);
3360EXPORT_SYMBOL(unlock_rename); 3274EXPORT_SYMBOL(unlock_rename);
3361EXPORT_SYMBOL(vfs_create); 3275EXPORT_SYMBOL(vfs_create);
3362EXPORT_SYMBOL(vfs_follow_link); 3276EXPORT_SYMBOL(vfs_follow_link);
diff --git a/fs/namespace.c b/fs/namespace.c
index fe59bd145d21..cda50fe9250a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -934,8 +934,8 @@ int mnt_had_events(struct proc_mounts *p)
934 int res = 0; 934 int res = 0;
935 935
936 br_read_lock(vfsmount_lock); 936 br_read_lock(vfsmount_lock);
937 if (p->event != ns->event) { 937 if (p->m.poll_event != ns->event) {
938 p->event = ns->event; 938 p->m.poll_event = ns->event;
939 res = 1; 939 res = 1;
940 } 940 }
941 br_read_unlock(vfsmount_lock); 941 br_read_unlock(vfsmount_lock);
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 0ed65e0c3dfe..64a326418aa2 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -20,9 +20,9 @@
20 20
21#include "ncp_fs.h" 21#include "ncp_fs.h"
22 22
23static int ncp_fsync(struct file *file, int datasync) 23static int ncp_fsync(struct file *file, loff_t start, loff_t end, int datasync)
24{ 24{
25 return 0; 25 return filemap_write_and_wait_range(file->f_mapping, start, end);
26} 26}
27 27
28/* 28/*
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index 84690319e625..c98b439332fc 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -113,19 +113,18 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
113 113
114int nfs_cache_register(struct cache_detail *cd) 114int nfs_cache_register(struct cache_detail *cd)
115{ 115{
116 struct nameidata nd;
117 struct vfsmount *mnt; 116 struct vfsmount *mnt;
117 struct path path;
118 int ret; 118 int ret;
119 119
120 mnt = rpc_get_mount(); 120 mnt = rpc_get_mount();
121 if (IS_ERR(mnt)) 121 if (IS_ERR(mnt))
122 return PTR_ERR(mnt); 122 return PTR_ERR(mnt);
123 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd); 123 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path);
124 if (ret) 124 if (ret)
125 goto err; 125 goto err;
126 ret = sunrpc_cache_register_pipefs(nd.path.dentry, 126 ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd);
127 cd->name, 0600, cd); 127 path_put(&path);
128 path_put(&nd.path);
129 if (!ret) 128 if (!ret)
130 return ret; 129 return ret;
131err: 130err:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ededdbd0db38..57f578e2560a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -56,7 +56,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *);
56static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); 56static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
57static int nfs_rename(struct inode *, struct dentry *, 57static int nfs_rename(struct inode *, struct dentry *,
58 struct inode *, struct dentry *); 58 struct inode *, struct dentry *);
59static int nfs_fsync_dir(struct file *, int); 59static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
60static loff_t nfs_llseek_dir(struct file *, loff_t, int); 60static loff_t nfs_llseek_dir(struct file *, loff_t, int);
61static void nfs_readdir_clear_array(struct page*); 61static void nfs_readdir_clear_array(struct page*);
62 62
@@ -945,15 +945,19 @@ out:
945 * All directory operations under NFS are synchronous, so fsync() 945 * All directory operations under NFS are synchronous, so fsync()
946 * is a dummy operation. 946 * is a dummy operation.
947 */ 947 */
948static int nfs_fsync_dir(struct file *filp, int datasync) 948static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
949 int datasync)
949{ 950{
950 struct dentry *dentry = filp->f_path.dentry; 951 struct dentry *dentry = filp->f_path.dentry;
952 struct inode *inode = dentry->d_inode;
951 953
952 dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", 954 dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
953 dentry->d_parent->d_name.name, dentry->d_name.name, 955 dentry->d_parent->d_name.name, dentry->d_name.name,
954 datasync); 956 datasync);
955 957
958 mutex_lock(&inode->i_mutex);
956 nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); 959 nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
960 mutex_unlock(&inode->i_mutex);
957 return 0; 961 return 0;
958} 962}
959 963
@@ -997,14 +1001,12 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
997 * Return the intent data that applies to this particular path component 1001 * Return the intent data that applies to this particular path component
998 * 1002 *
999 * Note that the current set of intents only apply to the very last 1003 * Note that the current set of intents only apply to the very last
1000 * component of the path. 1004 * component of the path and none of them is set before that last
1001 * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. 1005 * component.
1002 */ 1006 */
1003static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, 1007static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
1004 unsigned int mask) 1008 unsigned int mask)
1005{ 1009{
1006 if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
1007 return 0;
1008 return nd->flags & mask; 1010 return nd->flags & mask;
1009} 1011}
1010 1012
@@ -1338,25 +1340,31 @@ static int is_atomic_open(struct nameidata *nd)
1338 return 0; 1340 return 0;
1339 /* Are we trying to write to a read only partition? */ 1341 /* Are we trying to write to a read only partition? */
1340 if (__mnt_is_readonly(nd->path.mnt) && 1342 if (__mnt_is_readonly(nd->path.mnt) &&
1341 (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) 1343 (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE)))
1342 return 0; 1344 return 0;
1343 return 1; 1345 return 1;
1344} 1346}
1345 1347
1346static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd) 1348static fmode_t flags_to_mode(int flags)
1349{
1350 fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
1351 if ((flags & O_ACCMODE) != O_WRONLY)
1352 res |= FMODE_READ;
1353 if ((flags & O_ACCMODE) != O_RDONLY)
1354 res |= FMODE_WRITE;
1355 return res;
1356}
1357
1358static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
1347{ 1359{
1348 struct path path = {
1349 .mnt = nd->path.mnt,
1350 .dentry = dentry,
1351 };
1352 struct nfs_open_context *ctx; 1360 struct nfs_open_context *ctx;
1353 struct rpc_cred *cred; 1361 struct rpc_cred *cred;
1354 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); 1362 fmode_t fmode = flags_to_mode(open_flags);
1355 1363
1356 cred = rpc_lookup_cred(); 1364 cred = rpc_lookup_cred();
1357 if (IS_ERR(cred)) 1365 if (IS_ERR(cred))
1358 return ERR_CAST(cred); 1366 return ERR_CAST(cred);
1359 ctx = alloc_nfs_open_context(&path, cred, fmode); 1367 ctx = alloc_nfs_open_context(dentry, cred, fmode);
1360 put_rpccred(cred); 1368 put_rpccred(cred);
1361 if (ctx == NULL) 1369 if (ctx == NULL)
1362 return ERR_PTR(-ENOMEM); 1370 return ERR_PTR(-ENOMEM);
@@ -1376,13 +1384,13 @@ static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ct
1376 1384
1377 /* If the open_intent is for execute, we have an extra check to make */ 1385 /* If the open_intent is for execute, we have an extra check to make */
1378 if (ctx->mode & FMODE_EXEC) { 1386 if (ctx->mode & FMODE_EXEC) {
1379 ret = nfs_may_open(ctx->path.dentry->d_inode, 1387 ret = nfs_may_open(ctx->dentry->d_inode,
1380 ctx->cred, 1388 ctx->cred,
1381 nd->intent.open.flags); 1389 nd->intent.open.flags);
1382 if (ret < 0) 1390 if (ret < 0)
1383 goto out; 1391 goto out;
1384 } 1392 }
1385 filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open); 1393 filp = lookup_instantiate_filp(nd, ctx->dentry, do_open);
1386 if (IS_ERR(filp)) 1394 if (IS_ERR(filp))
1387 ret = PTR_ERR(filp); 1395 ret = PTR_ERR(filp);
1388 else 1396 else
@@ -1420,12 +1428,13 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1420 goto out; 1428 goto out;
1421 } 1429 }
1422 1430
1423 ctx = nameidata_to_nfs_open_context(dentry, nd); 1431 open_flags = nd->intent.open.flags;
1432
1433 ctx = create_nfs_open_context(dentry, open_flags);
1424 res = ERR_CAST(ctx); 1434 res = ERR_CAST(ctx);
1425 if (IS_ERR(ctx)) 1435 if (IS_ERR(ctx))
1426 goto out; 1436 goto out;
1427 1437
1428 open_flags = nd->intent.open.flags;
1429 if (nd->flags & LOOKUP_CREATE) { 1438 if (nd->flags & LOOKUP_CREATE) {
1430 attr.ia_mode = nd->intent.open.create_mode; 1439 attr.ia_mode = nd->intent.open.create_mode;
1431 attr.ia_valid = ATTR_MODE; 1440 attr.ia_valid = ATTR_MODE;
@@ -1463,8 +1472,8 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1463 res = d_add_unique(dentry, inode); 1472 res = d_add_unique(dentry, inode);
1464 nfs_unblock_sillyrename(dentry->d_parent); 1473 nfs_unblock_sillyrename(dentry->d_parent);
1465 if (res != NULL) { 1474 if (res != NULL) {
1466 dput(ctx->path.dentry); 1475 dput(ctx->dentry);
1467 ctx->path.dentry = dget(res); 1476 ctx->dentry = dget(res);
1468 dentry = res; 1477 dentry = res;
1469 } 1478 }
1470 err = nfs_intent_set_file(nd, ctx); 1479 err = nfs_intent_set_file(nd, ctx);
@@ -1517,7 +1526,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1517 /* We can't create new files, or truncate existing ones here */ 1526 /* We can't create new files, or truncate existing ones here */
1518 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); 1527 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
1519 1528
1520 ctx = nameidata_to_nfs_open_context(dentry, nd); 1529 ctx = create_nfs_open_context(dentry, openflags);
1521 ret = PTR_ERR(ctx); 1530 ret = PTR_ERR(ctx);
1522 if (IS_ERR(ctx)) 1531 if (IS_ERR(ctx))
1523 goto out; 1532 goto out;
@@ -1570,7 +1579,7 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
1570 struct nfs_open_context *ctx = NULL; 1579 struct nfs_open_context *ctx = NULL;
1571 struct iattr attr; 1580 struct iattr attr;
1572 int error; 1581 int error;
1573 int open_flags = 0; 1582 int open_flags = O_CREAT|O_EXCL;
1574 1583
1575 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1584 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1576 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1585 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1578,27 +1587,27 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
1578 attr.ia_mode = mode; 1587 attr.ia_mode = mode;
1579 attr.ia_valid = ATTR_MODE; 1588 attr.ia_valid = ATTR_MODE;
1580 1589
1581 if ((nd->flags & LOOKUP_CREATE) != 0) { 1590 if (nd)
1582 open_flags = nd->intent.open.flags; 1591 open_flags = nd->intent.open.flags;
1583 1592
1584 ctx = nameidata_to_nfs_open_context(dentry, nd); 1593 ctx = create_nfs_open_context(dentry, open_flags);
1585 error = PTR_ERR(ctx); 1594 error = PTR_ERR(ctx);
1586 if (IS_ERR(ctx)) 1595 if (IS_ERR(ctx))
1587 goto out_err_drop; 1596 goto out_err_drop;
1588 }
1589 1597
1590 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); 1598 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
1591 if (error != 0) 1599 if (error != 0)
1592 goto out_put_ctx; 1600 goto out_put_ctx;
1593 if (ctx != NULL) { 1601 if (nd) {
1594 error = nfs_intent_set_file(nd, ctx); 1602 error = nfs_intent_set_file(nd, ctx);
1595 if (error < 0) 1603 if (error < 0)
1596 goto out_err; 1604 goto out_err;
1605 } else {
1606 put_nfs_open_context(ctx);
1597 } 1607 }
1598 return 0; 1608 return 0;
1599out_put_ctx: 1609out_put_ctx:
1600 if (ctx != NULL) 1610 put_nfs_open_context(ctx);
1601 put_nfs_open_context(ctx);
1602out_err_drop: 1611out_err_drop:
1603 d_drop(dentry); 1612 d_drop(dentry);
1604out_err: 1613out_err:
@@ -1660,7 +1669,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1660{ 1669{
1661 struct iattr attr; 1670 struct iattr attr;
1662 int error; 1671 int error;
1663 int open_flags = 0; 1672 int open_flags = O_CREAT|O_EXCL;
1664 1673
1665 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1674 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1666 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1675 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1668,7 +1677,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1668 attr.ia_mode = mode; 1677 attr.ia_mode = mode;
1669 attr.ia_valid = ATTR_MODE; 1678 attr.ia_valid = ATTR_MODE;
1670 1679
1671 if ((nd->flags & LOOKUP_CREATE) != 0) 1680 if (nd)
1672 open_flags = nd->intent.open.flags; 1681 open_flags = nd->intent.open.flags;
1673 1682
1674 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); 1683 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL);
@@ -2259,11 +2268,11 @@ static int nfs_open_permission_mask(int openflags)
2259{ 2268{
2260 int mask = 0; 2269 int mask = 0;
2261 2270
2262 if (openflags & FMODE_READ) 2271 if ((openflags & O_ACCMODE) != O_WRONLY)
2263 mask |= MAY_READ; 2272 mask |= MAY_READ;
2264 if (openflags & FMODE_WRITE) 2273 if ((openflags & O_ACCMODE) != O_RDONLY)
2265 mask |= MAY_WRITE; 2274 mask |= MAY_WRITE;
2266 if (openflags & FMODE_EXEC) 2275 if (openflags & __FMODE_EXEC)
2267 mask |= MAY_EXEC; 2276 mask |= MAY_EXEC;
2268 return mask; 2277 return mask;
2269} 2278}
@@ -2273,12 +2282,12 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
2273 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); 2282 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
2274} 2283}
2275 2284
2276int nfs_permission(struct inode *inode, int mask, unsigned int flags) 2285int nfs_permission(struct inode *inode, int mask)
2277{ 2286{
2278 struct rpc_cred *cred; 2287 struct rpc_cred *cred;
2279 int res = 0; 2288 int res = 0;
2280 2289
2281 if (flags & IPERM_FLAG_RCU) 2290 if (mask & MAY_NOT_BLOCK)
2282 return -ECHILD; 2291 return -ECHILD;
2283 2292
2284 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 2293 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
@@ -2328,7 +2337,7 @@ out:
2328out_notsup: 2337out_notsup:
2329 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 2338 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2330 if (res == 0) 2339 if (res == 0)
2331 res = generic_permission(inode, mask, flags, NULL); 2340 res = generic_permission(inode, mask);
2332 goto out; 2341 goto out;
2333} 2342}
2334 2343
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8eea25366717..b35d25b98da6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -284,7 +284,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
284 loff_t pos) 284 loff_t pos)
285{ 285{
286 struct nfs_open_context *ctx = dreq->ctx; 286 struct nfs_open_context *ctx = dreq->ctx;
287 struct inode *inode = ctx->path.dentry->d_inode; 287 struct inode *inode = ctx->dentry->d_inode;
288 unsigned long user_addr = (unsigned long)iov->iov_base; 288 unsigned long user_addr = (unsigned long)iov->iov_base;
289 size_t count = iov->iov_len; 289 size_t count = iov->iov_len;
290 size_t rsize = NFS_SERVER(inode)->rsize; 290 size_t rsize = NFS_SERVER(inode)->rsize;
@@ -715,7 +715,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
715 loff_t pos, int sync) 715 loff_t pos, int sync)
716{ 716{
717 struct nfs_open_context *ctx = dreq->ctx; 717 struct nfs_open_context *ctx = dreq->ctx;
718 struct inode *inode = ctx->path.dentry->d_inode; 718 struct inode *inode = ctx->dentry->d_inode;
719 unsigned long user_addr = (unsigned long)iov->iov_base; 719 unsigned long user_addr = (unsigned long)iov->iov_base;
720 size_t count = iov->iov_len; 720 size_t count = iov->iov_len;
721 struct rpc_task *task; 721 struct rpc_task *task;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2f093ed16980..28b8c3f3cda3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -55,7 +55,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
55static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, 55static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
56 unsigned long nr_segs, loff_t pos); 56 unsigned long nr_segs, loff_t pos);
57static int nfs_file_flush(struct file *, fl_owner_t id); 57static int nfs_file_flush(struct file *, fl_owner_t id);
58static int nfs_file_fsync(struct file *, int datasync); 58static int nfs_file_fsync(struct file *, loff_t, loff_t, int datasync);
59static int nfs_check_flags(int flags); 59static int nfs_check_flags(int flags);
60static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); 60static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
61static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); 61static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
@@ -187,8 +187,11 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
187 filp->f_path.dentry->d_name.name, 187 filp->f_path.dentry->d_name.name,
188 offset, origin); 188 offset, origin);
189 189
190 /* origin == SEEK_END => we must revalidate the cached file length */ 190 /*
191 if (origin == SEEK_END) { 191 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
192 * the cached file length
193 */
194 if (origin != SEEK_SET || origin != SEEK_CUR) {
192 struct inode *inode = filp->f_mapping->host; 195 struct inode *inode = filp->f_mapping->host;
193 196
194 int retval = nfs_revalidate_file_size(inode, filp); 197 int retval = nfs_revalidate_file_size(inode, filp);
@@ -305,7 +308,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
305 * fall back to doing a synchronous write. 308 * fall back to doing a synchronous write.
306 */ 309 */
307static int 310static int
308nfs_file_fsync(struct file *file, int datasync) 311nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
309{ 312{
310 struct dentry *dentry = file->f_path.dentry; 313 struct dentry *dentry = file->f_path.dentry;
311 struct nfs_open_context *ctx = nfs_file_open_context(file); 314 struct nfs_open_context *ctx = nfs_file_open_context(file);
@@ -313,11 +316,15 @@ nfs_file_fsync(struct file *file, int datasync)
313 int have_error, status; 316 int have_error, status;
314 int ret = 0; 317 int ret = 0;
315 318
316
317 dprintk("NFS: fsync file(%s/%s) datasync %d\n", 319 dprintk("NFS: fsync file(%s/%s) datasync %d\n",
318 dentry->d_parent->d_name.name, dentry->d_name.name, 320 dentry->d_parent->d_name.name, dentry->d_name.name,
319 datasync); 321 datasync);
320 322
323 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
324 if (ret)
325 return ret;
326 mutex_lock(&inode->i_mutex);
327
321 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 328 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
322 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 329 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
323 status = nfs_commit_inode(inode, FLUSH_SYNC); 330 status = nfs_commit_inode(inode, FLUSH_SYNC);
@@ -329,6 +336,7 @@ nfs_file_fsync(struct file *file, int datasync)
329 if (!ret && !datasync) 336 if (!ret && !datasync)
330 /* application has asked for meta-data sync */ 337 /* application has asked for meta-data sync */
331 ret = pnfs_layoutcommit_inode(inode, true); 338 ret = pnfs_layoutcommit_inode(inode, true);
339 mutex_unlock(&inode->i_mutex);
332 return ret; 340 return ret;
333} 341}
334 342
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6f4850deb272..fe1203797b2b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -567,7 +567,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
567struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) 567struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
568{ 568{
569 struct nfs_lock_context *res, *new = NULL; 569 struct nfs_lock_context *res, *new = NULL;
570 struct inode *inode = ctx->path.dentry->d_inode; 570 struct inode *inode = ctx->dentry->d_inode;
571 571
572 spin_lock(&inode->i_lock); 572 spin_lock(&inode->i_lock);
573 res = __nfs_find_lock_context(ctx); 573 res = __nfs_find_lock_context(ctx);
@@ -594,7 +594,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
594void nfs_put_lock_context(struct nfs_lock_context *l_ctx) 594void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
595{ 595{
596 struct nfs_open_context *ctx = l_ctx->open_context; 596 struct nfs_open_context *ctx = l_ctx->open_context;
597 struct inode *inode = ctx->path.dentry->d_inode; 597 struct inode *inode = ctx->dentry->d_inode;
598 598
599 if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) 599 if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
600 return; 600 return;
@@ -620,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
620 return; 620 return;
621 if (!is_sync) 621 if (!is_sync)
622 return; 622 return;
623 inode = ctx->path.dentry->d_inode; 623 inode = ctx->dentry->d_inode;
624 if (!list_empty(&NFS_I(inode)->open_files)) 624 if (!list_empty(&NFS_I(inode)->open_files))
625 return; 625 return;
626 server = NFS_SERVER(inode); 626 server = NFS_SERVER(inode);
@@ -629,14 +629,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
629 nfs_revalidate_inode(server, inode); 629 nfs_revalidate_inode(server, inode);
630} 630}
631 631
632struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode) 632struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode)
633{ 633{
634 struct nfs_open_context *ctx; 634 struct nfs_open_context *ctx;
635 635
636 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 636 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
637 if (ctx != NULL) { 637 if (ctx != NULL) {
638 ctx->path = *path; 638 nfs_sb_active(dentry->d_sb);
639 path_get(&ctx->path); 639 ctx->dentry = dget(dentry);
640 ctx->cred = get_rpccred(cred); 640 ctx->cred = get_rpccred(cred);
641 ctx->state = NULL; 641 ctx->state = NULL;
642 ctx->mode = f_mode; 642 ctx->mode = f_mode;
@@ -658,7 +658,8 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
658 658
659static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) 659static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
660{ 660{
661 struct inode *inode = ctx->path.dentry->d_inode; 661 struct inode *inode = ctx->dentry->d_inode;
662 struct super_block *sb = ctx->dentry->d_sb;
662 663
663 if (!list_empty(&ctx->list)) { 664 if (!list_empty(&ctx->list)) {
664 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) 665 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
@@ -671,7 +672,8 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
671 NFS_PROTO(inode)->close_context(ctx, is_sync); 672 NFS_PROTO(inode)->close_context(ctx, is_sync);
672 if (ctx->cred != NULL) 673 if (ctx->cred != NULL)
673 put_rpccred(ctx->cred); 674 put_rpccred(ctx->cred);
674 path_put(&ctx->path); 675 dput(ctx->dentry);
676 nfs_sb_deactive(sb);
675 kfree(ctx); 677 kfree(ctx);
676} 678}
677 679
@@ -741,7 +743,7 @@ int nfs_open(struct inode *inode, struct file *filp)
741 cred = rpc_lookup_cred(); 743 cred = rpc_lookup_cred();
742 if (IS_ERR(cred)) 744 if (IS_ERR(cred))
743 return PTR_ERR(cred); 745 return PTR_ERR(cred);
744 ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode); 746 ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode);
745 put_rpccred(cred); 747 put_rpccred(cred);
746 if (ctx == NULL) 748 if (ctx == NULL)
747 return -ENOMEM; 749 return -ENOMEM;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c4a69833dd0d..b788f2eb1ba0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -238,7 +238,7 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
238extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 238extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
239extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 239extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
240extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 240extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
241extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); 241extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
242extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 242extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
243extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 243extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
244 struct nfs4_fs_locations *fs_locations, struct page *page); 244 struct nfs4_fs_locations *fs_locations, struct page *page);
@@ -341,8 +341,8 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc
341extern void nfs4_put_state_owner(struct nfs4_state_owner *); 341extern void nfs4_put_state_owner(struct nfs4_state_owner *);
342extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 342extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
343extern void nfs4_put_open_state(struct nfs4_state *); 343extern void nfs4_put_open_state(struct nfs4_state *);
344extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 344extern void nfs4_close_state(struct nfs4_state *, fmode_t);
345extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 345extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
346extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 346extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
347extern void nfs4_schedule_lease_recovery(struct nfs_client *); 347extern void nfs4_schedule_lease_recovery(struct nfs_client *);
348extern void nfs4_schedule_state_manager(struct nfs_client *); 348extern void nfs4_schedule_state_manager(struct nfs_client *);
@@ -373,8 +373,8 @@ extern struct svc_version nfs4_callback_version4;
373 373
374#else 374#else
375 375
376#define nfs4_close_state(a, b, c) do { } while (0) 376#define nfs4_close_state(a, b) do { } while (0)
377#define nfs4_close_sync(a, b, c) do { } while (0) 377#define nfs4_close_sync(a, b) do { } while (0)
378 378
379#endif /* CONFIG_NFS_V4 */ 379#endif /* CONFIG_NFS_V4 */
380#endif /* __LINUX_FS_NFS_NFS4_FS.H */ 380#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5879b23e0c99..26bece8f3083 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -763,8 +763,8 @@ struct nfs4_opendata {
763 struct nfs_open_confirmres c_res; 763 struct nfs_open_confirmres c_res;
764 struct nfs_fattr f_attr; 764 struct nfs_fattr f_attr;
765 struct nfs_fattr dir_attr; 765 struct nfs_fattr dir_attr;
766 struct path path;
767 struct dentry *dir; 766 struct dentry *dir;
767 struct dentry *dentry;
768 struct nfs4_state_owner *owner; 768 struct nfs4_state_owner *owner;
769 struct nfs4_state *state; 769 struct nfs4_state *state;
770 struct iattr attrs; 770 struct iattr attrs;
@@ -786,12 +786,12 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
786 nfs_fattr_init(&p->dir_attr); 786 nfs_fattr_init(&p->dir_attr);
787} 787}
788 788
789static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 789static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
790 struct nfs4_state_owner *sp, fmode_t fmode, int flags, 790 struct nfs4_state_owner *sp, fmode_t fmode, int flags,
791 const struct iattr *attrs, 791 const struct iattr *attrs,
792 gfp_t gfp_mask) 792 gfp_t gfp_mask)
793{ 793{
794 struct dentry *parent = dget_parent(path->dentry); 794 struct dentry *parent = dget_parent(dentry);
795 struct inode *dir = parent->d_inode; 795 struct inode *dir = parent->d_inode;
796 struct nfs_server *server = NFS_SERVER(dir); 796 struct nfs_server *server = NFS_SERVER(dir);
797 struct nfs4_opendata *p; 797 struct nfs4_opendata *p;
@@ -802,8 +802,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
802 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); 802 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
803 if (p->o_arg.seqid == NULL) 803 if (p->o_arg.seqid == NULL)
804 goto err_free; 804 goto err_free;
805 path_get(path); 805 nfs_sb_active(dentry->d_sb);
806 p->path = *path; 806 p->dentry = dget(dentry);
807 p->dir = parent; 807 p->dir = parent;
808 p->owner = sp; 808 p->owner = sp;
809 atomic_inc(&sp->so_count); 809 atomic_inc(&sp->so_count);
@@ -812,7 +812,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
812 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); 812 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
813 p->o_arg.clientid = server->nfs_client->cl_clientid; 813 p->o_arg.clientid = server->nfs_client->cl_clientid;
814 p->o_arg.id = sp->so_owner_id.id; 814 p->o_arg.id = sp->so_owner_id.id;
815 p->o_arg.name = &p->path.dentry->d_name; 815 p->o_arg.name = &dentry->d_name;
816 p->o_arg.server = server; 816 p->o_arg.server = server;
817 p->o_arg.bitmask = server->attr_bitmask; 817 p->o_arg.bitmask = server->attr_bitmask;
818 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 818 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
@@ -842,13 +842,15 @@ static void nfs4_opendata_free(struct kref *kref)
842{ 842{
843 struct nfs4_opendata *p = container_of(kref, 843 struct nfs4_opendata *p = container_of(kref,
844 struct nfs4_opendata, kref); 844 struct nfs4_opendata, kref);
845 struct super_block *sb = p->dentry->d_sb;
845 846
846 nfs_free_seqid(p->o_arg.seqid); 847 nfs_free_seqid(p->o_arg.seqid);
847 if (p->state != NULL) 848 if (p->state != NULL)
848 nfs4_put_open_state(p->state); 849 nfs4_put_open_state(p->state);
849 nfs4_put_state_owner(p->owner); 850 nfs4_put_state_owner(p->owner);
850 dput(p->dir); 851 dput(p->dir);
851 path_put(&p->path); 852 dput(p->dentry);
853 nfs_sb_deactive(sb);
852 kfree(p); 854 kfree(p);
853} 855}
854 856
@@ -1130,7 +1132,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
1130{ 1132{
1131 struct nfs4_opendata *opendata; 1133 struct nfs4_opendata *opendata;
1132 1134
1133 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS); 1135 opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS);
1134 if (opendata == NULL) 1136 if (opendata == NULL)
1135 return ERR_PTR(-ENOMEM); 1137 return ERR_PTR(-ENOMEM);
1136 opendata->state = state; 1138 opendata->state = state;
@@ -1154,7 +1156,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1154 newstate = nfs4_opendata_to_nfs4_state(opendata); 1156 newstate = nfs4_opendata_to_nfs4_state(opendata);
1155 if (IS_ERR(newstate)) 1157 if (IS_ERR(newstate))
1156 return PTR_ERR(newstate); 1158 return PTR_ERR(newstate);
1157 nfs4_close_state(&opendata->path, newstate, fmode); 1159 nfs4_close_state(newstate, fmode);
1158 *res = newstate; 1160 *res = newstate;
1159 return 0; 1161 return 0;
1160} 1162}
@@ -1352,7 +1354,7 @@ static void nfs4_open_confirm_release(void *calldata)
1352 goto out_free; 1354 goto out_free;
1353 state = nfs4_opendata_to_nfs4_state(data); 1355 state = nfs4_opendata_to_nfs4_state(data);
1354 if (!IS_ERR(state)) 1356 if (!IS_ERR(state))
1355 nfs4_close_state(&data->path, state, data->o_arg.fmode); 1357 nfs4_close_state(state, data->o_arg.fmode);
1356out_free: 1358out_free:
1357 nfs4_opendata_put(data); 1359 nfs4_opendata_put(data);
1358} 1360}
@@ -1497,7 +1499,7 @@ static void nfs4_open_release(void *calldata)
1497 goto out_free; 1499 goto out_free;
1498 state = nfs4_opendata_to_nfs4_state(data); 1500 state = nfs4_opendata_to_nfs4_state(data);
1499 if (!IS_ERR(state)) 1501 if (!IS_ERR(state))
1500 nfs4_close_state(&data->path, state, data->o_arg.fmode); 1502 nfs4_close_state(state, data->o_arg.fmode);
1501out_free: 1503out_free:
1502 nfs4_opendata_put(data); 1504 nfs4_opendata_put(data);
1503} 1505}
@@ -1648,7 +1650,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
1648 return PTR_ERR(opendata); 1650 return PTR_ERR(opendata);
1649 ret = nfs4_open_recover(opendata, state); 1651 ret = nfs4_open_recover(opendata, state);
1650 if (ret == -ESTALE) 1652 if (ret == -ESTALE)
1651 d_drop(ctx->path.dentry); 1653 d_drop(ctx->dentry);
1652 nfs4_opendata_put(opendata); 1654 nfs4_opendata_put(opendata);
1653 return ret; 1655 return ret;
1654} 1656}
@@ -1706,7 +1708,7 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1706/* 1708/*
1707 * Returns a referenced nfs4_state 1709 * Returns a referenced nfs4_state
1708 */ 1710 */
1709static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1711static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
1710{ 1712{
1711 struct nfs4_state_owner *sp; 1713 struct nfs4_state_owner *sp;
1712 struct nfs4_state *state = NULL; 1714 struct nfs4_state *state = NULL;
@@ -1723,15 +1725,15 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1723 status = nfs4_recover_expired_lease(server); 1725 status = nfs4_recover_expired_lease(server);
1724 if (status != 0) 1726 if (status != 0)
1725 goto err_put_state_owner; 1727 goto err_put_state_owner;
1726 if (path->dentry->d_inode != NULL) 1728 if (dentry->d_inode != NULL)
1727 nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); 1729 nfs4_return_incompatible_delegation(dentry->d_inode, fmode);
1728 status = -ENOMEM; 1730 status = -ENOMEM;
1729 opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL); 1731 opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL);
1730 if (opendata == NULL) 1732 if (opendata == NULL)
1731 goto err_put_state_owner; 1733 goto err_put_state_owner;
1732 1734
1733 if (path->dentry->d_inode != NULL) 1735 if (dentry->d_inode != NULL)
1734 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); 1736 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1735 1737
1736 status = _nfs4_proc_open(opendata); 1738 status = _nfs4_proc_open(opendata);
1737 if (status != 0) 1739 if (status != 0)
@@ -1769,14 +1771,14 @@ out_err:
1769} 1771}
1770 1772
1771 1773
1772static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) 1774static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
1773{ 1775{
1774 struct nfs4_exception exception = { }; 1776 struct nfs4_exception exception = { };
1775 struct nfs4_state *res; 1777 struct nfs4_state *res;
1776 int status; 1778 int status;
1777 1779
1778 do { 1780 do {
1779 status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res); 1781 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
1780 if (status == 0) 1782 if (status == 0)
1781 break; 1783 break;
1782 /* NOTE: BAD_SEQID means the server and client disagree about the 1784 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1873,7 +1875,6 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1873} 1875}
1874 1876
1875struct nfs4_closedata { 1877struct nfs4_closedata {
1876 struct path path;
1877 struct inode *inode; 1878 struct inode *inode;
1878 struct nfs4_state *state; 1879 struct nfs4_state *state;
1879 struct nfs_closeargs arg; 1880 struct nfs_closeargs arg;
@@ -1888,13 +1889,14 @@ static void nfs4_free_closedata(void *data)
1888{ 1889{
1889 struct nfs4_closedata *calldata = data; 1890 struct nfs4_closedata *calldata = data;
1890 struct nfs4_state_owner *sp = calldata->state->owner; 1891 struct nfs4_state_owner *sp = calldata->state->owner;
1892 struct super_block *sb = calldata->state->inode->i_sb;
1891 1893
1892 if (calldata->roc) 1894 if (calldata->roc)
1893 pnfs_roc_release(calldata->state->inode); 1895 pnfs_roc_release(calldata->state->inode);
1894 nfs4_put_open_state(calldata->state); 1896 nfs4_put_open_state(calldata->state);
1895 nfs_free_seqid(calldata->arg.seqid); 1897 nfs_free_seqid(calldata->arg.seqid);
1896 nfs4_put_state_owner(sp); 1898 nfs4_put_state_owner(sp);
1897 path_put(&calldata->path); 1899 nfs_sb_deactive(sb);
1898 kfree(calldata); 1900 kfree(calldata);
1899} 1901}
1900 1902
@@ -2014,7 +2016,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
2014 * 2016 *
2015 * NOTE: Caller must be holding the sp->so_owner semaphore! 2017 * NOTE: Caller must be holding the sp->so_owner semaphore!
2016 */ 2018 */
2017int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) 2019int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
2018{ 2020{
2019 struct nfs_server *server = NFS_SERVER(state->inode); 2021 struct nfs_server *server = NFS_SERVER(state->inode);
2020 struct nfs4_closedata *calldata; 2022 struct nfs4_closedata *calldata;
@@ -2050,8 +2052,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
2050 calldata->res.seqid = calldata->arg.seqid; 2052 calldata->res.seqid = calldata->arg.seqid;
2051 calldata->res.server = server; 2053 calldata->res.server = server;
2052 calldata->roc = roc; 2054 calldata->roc = roc;
2053 path_get(path); 2055 nfs_sb_active(calldata->inode->i_sb);
2054 calldata->path = *path;
2055 2056
2056 msg.rpc_argp = &calldata->arg; 2057 msg.rpc_argp = &calldata->arg;
2057 msg.rpc_resp = &calldata->res; 2058 msg.rpc_resp = &calldata->res;
@@ -2080,7 +2081,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
2080 struct nfs4_state *state; 2081 struct nfs4_state *state;
2081 2082
2082 /* Protect against concurrent sillydeletes */ 2083 /* Protect against concurrent sillydeletes */
2083 state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred); 2084 state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
2084 if (IS_ERR(state)) 2085 if (IS_ERR(state))
2085 return ERR_CAST(state); 2086 return ERR_CAST(state);
2086 ctx->state = state; 2087 ctx->state = state;
@@ -2092,9 +2093,9 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
2092 if (ctx->state == NULL) 2093 if (ctx->state == NULL)
2093 return; 2094 return;
2094 if (is_sync) 2095 if (is_sync)
2095 nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); 2096 nfs4_close_sync(ctx->state, ctx->mode);
2096 else 2097 else
2097 nfs4_close_state(&ctx->path, ctx->state, ctx->mode); 2098 nfs4_close_state(ctx->state, ctx->mode);
2098} 2099}
2099 2100
2100static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) 2101static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
@@ -2616,10 +2617,7 @@ static int
2616nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 2617nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2617 int flags, struct nfs_open_context *ctx) 2618 int flags, struct nfs_open_context *ctx)
2618{ 2619{
2619 struct path my_path = { 2620 struct dentry *de = dentry;
2620 .dentry = dentry,
2621 };
2622 struct path *path = &my_path;
2623 struct nfs4_state *state; 2621 struct nfs4_state *state;
2624 struct rpc_cred *cred = NULL; 2622 struct rpc_cred *cred = NULL;
2625 fmode_t fmode = 0; 2623 fmode_t fmode = 0;
@@ -2627,11 +2625,11 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2627 2625
2628 if (ctx != NULL) { 2626 if (ctx != NULL) {
2629 cred = ctx->cred; 2627 cred = ctx->cred;
2630 path = &ctx->path; 2628 de = ctx->dentry;
2631 fmode = ctx->mode; 2629 fmode = ctx->mode;
2632 } 2630 }
2633 sattr->ia_mode &= ~current_umask(); 2631 sattr->ia_mode &= ~current_umask();
2634 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); 2632 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
2635 d_drop(dentry); 2633 d_drop(dentry);
2636 if (IS_ERR(state)) { 2634 if (IS_ERR(state)) {
2637 status = PTR_ERR(state); 2635 status = PTR_ERR(state);
@@ -2642,7 +2640,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2642 if (ctx != NULL) 2640 if (ctx != NULL)
2643 ctx->state = state; 2641 ctx->state = state;
2644 else 2642 else
2645 nfs4_close_sync(path, state, fmode); 2643 nfs4_close_sync(state, fmode);
2646out: 2644out:
2647 return status; 2645 return status;
2648} 2646}
@@ -4294,7 +4292,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
4294 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, 4292 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
4295 sizeof(data->lsp->ls_stateid.data)); 4293 sizeof(data->lsp->ls_stateid.data));
4296 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 4294 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
4297 renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); 4295 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
4298 } 4296 }
4299out: 4297out:
4300 dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); 4298 dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e97dd219f84f..7acfe8843626 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -641,7 +641,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
641/* 641/*
642 * Close the current file. 642 * Close the current file.
643 */ 643 */
644static void __nfs4_close(struct path *path, struct nfs4_state *state, 644static void __nfs4_close(struct nfs4_state *state,
645 fmode_t fmode, gfp_t gfp_mask, int wait) 645 fmode_t fmode, gfp_t gfp_mask, int wait)
646{ 646{
647 struct nfs4_state_owner *owner = state->owner; 647 struct nfs4_state_owner *owner = state->owner;
@@ -685,18 +685,18 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
685 } else { 685 } else {
686 bool roc = pnfs_roc(state->inode); 686 bool roc = pnfs_roc(state->inode);
687 687
688 nfs4_do_close(path, state, gfp_mask, wait, roc); 688 nfs4_do_close(state, gfp_mask, wait, roc);
689 } 689 }
690} 690}
691 691
692void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) 692void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
693{ 693{
694 __nfs4_close(path, state, fmode, GFP_NOFS, 0); 694 __nfs4_close(state, fmode, GFP_NOFS, 0);
695} 695}
696 696
697void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) 697void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
698{ 698{
699 __nfs4_close(path, state, fmode, GFP_KERNEL, 1); 699 __nfs4_close(state, fmode, GFP_KERNEL, 1);
700} 700}
701 701
702/* 702/*
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 009855716286..18449f43c568 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -114,7 +114,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
114 if (!nfs_lock_request_dontget(req)) 114 if (!nfs_lock_request_dontget(req))
115 return 0; 115 return 0;
116 if (test_bit(PG_MAPPED, &req->wb_flags)) 116 if (test_bit(PG_MAPPED, &req->wb_flags))
117 radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 117 radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
118 return 1; 118 return 1;
119} 119}
120 120
@@ -124,7 +124,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
124void nfs_clear_page_tag_locked(struct nfs_page *req) 124void nfs_clear_page_tag_locked(struct nfs_page *req)
125{ 125{
126 if (test_bit(PG_MAPPED, &req->wb_flags)) { 126 if (test_bit(PG_MAPPED, &req->wb_flags)) {
127 struct inode *inode = req->wb_context->path.dentry->d_inode; 127 struct inode *inode = req->wb_context->dentry->d_inode;
128 struct nfs_inode *nfsi = NFS_I(inode); 128 struct nfs_inode *nfsi = NFS_I(inode);
129 129
130 spin_lock(&inode->i_lock); 130 spin_lock(&inode->i_lock);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 20a7f952e244..a68679f538fc 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -144,7 +144,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
144 144
145static void nfs_readpage_release(struct nfs_page *req) 145static void nfs_readpage_release(struct nfs_page *req)
146{ 146{
147 struct inode *d_inode = req->wb_context->path.dentry->d_inode; 147 struct inode *d_inode = req->wb_context->dentry->d_inode;
148 148
149 if (PageUptodate(req->wb_page)) 149 if (PageUptodate(req->wb_page))
150 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 150 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
@@ -152,8 +152,8 @@ static void nfs_readpage_release(struct nfs_page *req)
152 unlock_page(req->wb_page); 152 unlock_page(req->wb_page);
153 153
154 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 154 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
155 req->wb_context->path.dentry->d_inode->i_sb->s_id, 155 req->wb_context->dentry->d_inode->i_sb->s_id,
156 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 156 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
157 req->wb_bytes, 157 req->wb_bytes,
158 (long long)req_offset(req)); 158 (long long)req_offset(req));
159 nfs_release_request(req); 159 nfs_release_request(req);
@@ -207,7 +207,7 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
207 unsigned int count, unsigned int offset, 207 unsigned int count, unsigned int offset,
208 struct pnfs_layout_segment *lseg) 208 struct pnfs_layout_segment *lseg)
209{ 209{
210 struct inode *inode = req->wb_context->path.dentry->d_inode; 210 struct inode *inode = req->wb_context->dentry->d_inode;
211 211
212 data->req = req; 212 data->req = req;
213 data->inode = inode; 213 data->inode = inode;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce40e5c568ba..b961ceac66b4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2773,16 +2773,12 @@ static void nfs_referral_loop_unprotect(void)
2773static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, 2773static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2774 const char *export_path) 2774 const char *export_path)
2775{ 2775{
2776 struct nameidata *nd = NULL;
2777 struct mnt_namespace *ns_private; 2776 struct mnt_namespace *ns_private;
2778 struct super_block *s; 2777 struct super_block *s;
2779 struct dentry *dentry; 2778 struct dentry *dentry;
2779 struct path path;
2780 int ret; 2780 int ret;
2781 2781
2782 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
2783 if (nd == NULL)
2784 return ERR_PTR(-ENOMEM);
2785
2786 ns_private = create_mnt_ns(root_mnt); 2782 ns_private = create_mnt_ns(root_mnt);
2787 ret = PTR_ERR(ns_private); 2783 ret = PTR_ERR(ns_private);
2788 if (IS_ERR(ns_private)) 2784 if (IS_ERR(ns_private))
@@ -2793,7 +2789,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2793 goto out_put_mnt_ns; 2789 goto out_put_mnt_ns;
2794 2790
2795 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, 2791 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
2796 export_path, LOOKUP_FOLLOW, nd); 2792 export_path, LOOKUP_FOLLOW, &path);
2797 2793
2798 nfs_referral_loop_unprotect(); 2794 nfs_referral_loop_unprotect();
2799 put_mnt_ns(ns_private); 2795 put_mnt_ns(ns_private);
@@ -2801,12 +2797,11 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2801 if (ret != 0) 2797 if (ret != 0)
2802 goto out_err; 2798 goto out_err;
2803 2799
2804 s = nd->path.mnt->mnt_sb; 2800 s = path.mnt->mnt_sb;
2805 atomic_inc(&s->s_active); 2801 atomic_inc(&s->s_active);
2806 dentry = dget(nd->path.dentry); 2802 dentry = dget(path.dentry);
2807 2803
2808 path_put(&nd->path); 2804 path_put(&path);
2809 kfree(nd);
2810 down_write(&s->s_umount); 2805 down_write(&s->s_umount);
2811 return dentry; 2806 return dentry;
2812out_put_mnt_ns: 2807out_put_mnt_ns:
@@ -2814,7 +2809,6 @@ out_put_mnt_ns:
2814out_mntput: 2809out_mntput:
2815 mntput(root_mnt); 2810 mntput(root_mnt);
2816out_err: 2811out_err:
2817 kfree(nd);
2818 return ERR_PTR(ret); 2812 return ERR_PTR(ret);
2819} 2813}
2820 2814
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 727168059684..08579312c57b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -409,7 +409,7 @@ out:
409 */ 409 */
410static void nfs_inode_remove_request(struct nfs_page *req) 410static void nfs_inode_remove_request(struct nfs_page *req)
411{ 411{
412 struct inode *inode = req->wb_context->path.dentry->d_inode; 412 struct inode *inode = req->wb_context->dentry->d_inode;
413 struct nfs_inode *nfsi = NFS_I(inode); 413 struct nfs_inode *nfsi = NFS_I(inode);
414 414
415 BUG_ON (!NFS_WBACK_BUSY(req)); 415 BUG_ON (!NFS_WBACK_BUSY(req));
@@ -438,7 +438,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
438static void 438static void
439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
440{ 440{
441 struct inode *inode = req->wb_context->path.dentry->d_inode; 441 struct inode *inode = req->wb_context->dentry->d_inode;
442 struct nfs_inode *nfsi = NFS_I(inode); 442 struct nfs_inode *nfsi = NFS_I(inode);
443 443
444 spin_lock(&inode->i_lock); 444 spin_lock(&inode->i_lock);
@@ -852,13 +852,13 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
852 struct pnfs_layout_segment *lseg, 852 struct pnfs_layout_segment *lseg,
853 int how) 853 int how)
854{ 854{
855 struct inode *inode = req->wb_context->path.dentry->d_inode; 855 struct inode *inode = req->wb_context->dentry->d_inode;
856 856
857 /* Set up the RPC argument and reply structs 857 /* Set up the RPC argument and reply structs
858 * NB: take care not to mess about with data->commit et al. */ 858 * NB: take care not to mess about with data->commit et al. */
859 859
860 data->req = req; 860 data->req = req;
861 data->inode = inode = req->wb_context->path.dentry->d_inode; 861 data->inode = inode = req->wb_context->dentry->d_inode;
862 data->cred = req->wb_context->cred; 862 data->cred = req->wb_context->cred;
863 data->lseg = get_lseg(lseg); 863 data->lseg = get_lseg(lseg);
864 864
@@ -1053,9 +1053,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1053 1053
1054 dprintk("NFS: %5u write(%s/%lld %d@%lld)", 1054 dprintk("NFS: %5u write(%s/%lld %d@%lld)",
1055 task->tk_pid, 1055 task->tk_pid,
1056 data->req->wb_context->path.dentry->d_inode->i_sb->s_id, 1056 data->req->wb_context->dentry->d_inode->i_sb->s_id,
1057 (long long) 1057 (long long)
1058 NFS_FILEID(data->req->wb_context->path.dentry->d_inode), 1058 NFS_FILEID(data->req->wb_context->dentry->d_inode),
1059 data->req->wb_bytes, (long long)req_offset(data->req)); 1059 data->req->wb_bytes, (long long)req_offset(data->req));
1060 1060
1061 nfs_writeback_done(task, data); 1061 nfs_writeback_done(task, data);
@@ -1148,8 +1148,8 @@ static void nfs_writeback_release_full(void *calldata)
1148 1148
1149 dprintk("NFS: %5u write (%s/%lld %d@%lld)", 1149 dprintk("NFS: %5u write (%s/%lld %d@%lld)",
1150 data->task.tk_pid, 1150 data->task.tk_pid,
1151 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1151 req->wb_context->dentry->d_inode->i_sb->s_id,
1152 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1152 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1153 req->wb_bytes, 1153 req->wb_bytes,
1154 (long long)req_offset(req)); 1154 (long long)req_offset(req));
1155 1155
@@ -1347,7 +1347,7 @@ void nfs_init_commit(struct nfs_write_data *data,
1347 struct pnfs_layout_segment *lseg) 1347 struct pnfs_layout_segment *lseg)
1348{ 1348{
1349 struct nfs_page *first = nfs_list_entry(head->next); 1349 struct nfs_page *first = nfs_list_entry(head->next);
1350 struct inode *inode = first->wb_context->path.dentry->d_inode; 1350 struct inode *inode = first->wb_context->dentry->d_inode;
1351 1351
1352 /* Set up the RPC argument and reply structs 1352 /* Set up the RPC argument and reply structs
1353 * NB: take care not to mess about with data->commit et al. */ 1353 * NB: take care not to mess about with data->commit et al. */
@@ -1435,8 +1435,8 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1435 nfs_clear_request_commit(req); 1435 nfs_clear_request_commit(req);
1436 1436
1437 dprintk("NFS: commit (%s/%lld %d@%lld)", 1437 dprintk("NFS: commit (%s/%lld %d@%lld)",
1438 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1438 req->wb_context->dentry->d_sb->s_id,
1439 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1439 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1440 req->wb_bytes, 1440 req->wb_bytes,
1441 (long long)req_offset(req)); 1441 (long long)req_offset(req));
1442 if (status < 0) { 1442 if (status < 0) {
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ffb59ef6f82f..29d77f60585b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -191,52 +191,42 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen,
191} 191}
192 192
193static int 193static int
194nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) 194nfsd4_list_rec_dir(recdir_func *f)
195{ 195{
196 const struct cred *original_cred; 196 const struct cred *original_cred;
197 struct file *filp; 197 struct dentry *dir = rec_file->f_path.dentry;
198 LIST_HEAD(names); 198 LIST_HEAD(names);
199 struct name_list *entry;
200 struct dentry *dentry;
201 int status; 199 int status;
202 200
203 if (!rec_file)
204 return 0;
205
206 status = nfs4_save_creds(&original_cred); 201 status = nfs4_save_creds(&original_cred);
207 if (status < 0) 202 if (status < 0)
208 return status; 203 return status;
209 204
210 filp = dentry_open(dget(dir), mntget(rec_file->f_path.mnt), O_RDONLY, 205 status = vfs_llseek(rec_file, 0, SEEK_SET);
211 current_cred()); 206 if (status < 0) {
212 status = PTR_ERR(filp); 207 nfs4_reset_creds(original_cred);
213 if (IS_ERR(filp)) 208 return status;
214 goto out; 209 }
215 status = vfs_readdir(filp, nfsd4_build_namelist, &names); 210
216 fput(filp); 211 status = vfs_readdir(rec_file, nfsd4_build_namelist, &names);
217 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); 212 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
218 while (!list_empty(&names)) { 213 while (!list_empty(&names)) {
214 struct name_list *entry;
219 entry = list_entry(names.next, struct name_list, list); 215 entry = list_entry(names.next, struct name_list, list);
220 216 if (!status) {
221 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); 217 struct dentry *dentry;
222 if (IS_ERR(dentry)) { 218 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
223 status = PTR_ERR(dentry); 219 if (IS_ERR(dentry)) {
224 break; 220 status = PTR_ERR(dentry);
221 break;
222 }
223 status = f(dir, dentry);
224 dput(dentry);
225 } 225 }
226 status = f(dir, dentry);
227 dput(dentry);
228 if (status)
229 break;
230 list_del(&entry->list); 226 list_del(&entry->list);
231 kfree(entry); 227 kfree(entry);
232 } 228 }
233 mutex_unlock(&dir->d_inode->i_mutex); 229 mutex_unlock(&dir->d_inode->i_mutex);
234out:
235 while (!list_empty(&names)) {
236 entry = list_entry(names.next, struct name_list, list);
237 list_del(&entry->list);
238 kfree(entry);
239 }
240 nfs4_reset_creds(original_cred); 230 nfs4_reset_creds(original_cred);
241 return status; 231 return status;
242} 232}
@@ -322,7 +312,7 @@ nfsd4_recdir_purge_old(void) {
322 status = mnt_want_write(rec_file->f_path.mnt); 312 status = mnt_want_write(rec_file->f_path.mnt);
323 if (status) 313 if (status)
324 goto out; 314 goto out;
325 status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old); 315 status = nfsd4_list_rec_dir(purge_old);
326 if (status == 0) 316 if (status == 0)
327 vfs_fsync(rec_file, 0); 317 vfs_fsync(rec_file, 0);
328 mnt_drop_write(rec_file->f_path.mnt); 318 mnt_drop_write(rec_file->f_path.mnt);
@@ -352,7 +342,7 @@ nfsd4_recdir_load(void) {
352 if (!rec_file) 342 if (!rec_file)
353 return 0; 343 return 0;
354 344
355 status = nfsd4_list_rec_dir(rec_file->f_path.dentry, load_recdir); 345 status = nfsd4_list_rec_dir(load_recdir);
356 if (status) 346 if (status)
357 printk("nfsd4: failed loading clients from recovery" 347 printk("nfsd4: failed loading clients from recovery"
358 " directory %s\n", rec_file->f_path.dentry->d_name.name); 348 " directory %s\n", rec_file->f_path.dentry->d_name.name);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index d7eeca62febd..26601529dc17 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -27,7 +27,7 @@
27#include "nilfs.h" 27#include "nilfs.h"
28#include "segment.h" 28#include "segment.h"
29 29
30int nilfs_sync_file(struct file *file, int datasync) 30int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
31{ 31{
32 /* 32 /*
33 * Called from fsync() system call 33 * Called from fsync() system call
@@ -40,8 +40,15 @@ int nilfs_sync_file(struct file *file, int datasync)
40 struct inode *inode = file->f_mapping->host; 40 struct inode *inode = file->f_mapping->host;
41 int err; 41 int err;
42 42
43 if (!nilfs_inode_dirty(inode)) 43 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
44 if (err)
45 return err;
46 mutex_lock(&inode->i_mutex);
47
48 if (!nilfs_inode_dirty(inode)) {
49 mutex_unlock(&inode->i_mutex);
44 return 0; 50 return 0;
51 }
45 52
46 if (datasync) 53 if (datasync)
47 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, 54 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0,
@@ -49,6 +56,7 @@ int nilfs_sync_file(struct file *file, int datasync)
49 else 56 else
50 err = nilfs_construct_segment(inode->i_sb); 57 err = nilfs_construct_segment(inode->i_sb);
51 58
59 mutex_unlock(&inode->i_mutex);
52 return err; 60 return err;
53} 61}
54 62
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b9b45fc2903e..666628b395f1 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -259,8 +259,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
259 return 0; 259 return 0;
260 260
261 /* Needs synchronization with the cleaner */ 261 /* Needs synchronization with the cleaner */
262 size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 262 size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
263 offset, nr_segs, nilfs_get_block, NULL); 263 nilfs_get_block);
264 264
265 /* 265 /*
266 * In case of error extending write may have instantiated a few 266 * In case of error extending write may have instantiated a few
@@ -778,6 +778,8 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
778 778
779 if ((iattr->ia_valid & ATTR_SIZE) && 779 if ((iattr->ia_valid & ATTR_SIZE) &&
780 iattr->ia_size != i_size_read(inode)) { 780 iattr->ia_size != i_size_read(inode)) {
781 inode_dio_wait(inode);
782
781 err = vmtruncate(inode, iattr->ia_size); 783 err = vmtruncate(inode, iattr->ia_size);
782 if (unlikely(err)) 784 if (unlikely(err))
783 goto out_err; 785 goto out_err;
@@ -799,14 +801,14 @@ out_err:
799 return err; 801 return err;
800} 802}
801 803
802int nilfs_permission(struct inode *inode, int mask, unsigned int flags) 804int nilfs_permission(struct inode *inode, int mask)
803{ 805{
804 struct nilfs_root *root = NILFS_I(inode)->i_root; 806 struct nilfs_root *root = NILFS_I(inode)->i_root;
805 if ((mask & MAY_WRITE) && root && 807 if ((mask & MAY_WRITE) && root &&
806 root->cno != NILFS_CPTREE_CURRENT_CNO) 808 root->cno != NILFS_CPTREE_CURRENT_CNO)
807 return -EROFS; /* snapshot is not writable */ 809 return -EROFS; /* snapshot is not writable */
808 810
809 return generic_permission(inode, mask, flags, NULL); 811 return generic_permission(inode, mask);
810} 812}
811 813
812int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 814int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 546849b3e88f..a3141990061e 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -72,12 +72,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
72 return ERR_PTR(-ENAMETOOLONG); 72 return ERR_PTR(-ENAMETOOLONG);
73 73
74 ino = nilfs_inode_by_name(dir, &dentry->d_name); 74 ino = nilfs_inode_by_name(dir, &dentry->d_name);
75 inode = NULL; 75 inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL;
76 if (ino) {
77 inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino);
78 if (IS_ERR(inode))
79 return ERR_CAST(inode);
80 }
81 return d_splice_alias(inode, dentry); 76 return d_splice_alias(inode, dentry);
82} 77}
83 78
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f02b9ad43a21..255d5e1c03b7 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -235,7 +235,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *,
235 struct page *, struct inode *); 235 struct page *, struct inode *);
236 236
237/* file.c */ 237/* file.c */
238extern int nilfs_sync_file(struct file *, int); 238extern int nilfs_sync_file(struct file *, loff_t, loff_t, int);
239 239
240/* ioctl.c */ 240/* ioctl.c */
241long nilfs_ioctl(struct file *, unsigned int, unsigned long); 241long nilfs_ioctl(struct file *, unsigned int, unsigned long);
@@ -264,7 +264,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *);
264extern void nilfs_truncate(struct inode *); 264extern void nilfs_truncate(struct inode *);
265extern void nilfs_evict_inode(struct inode *); 265extern void nilfs_evict_inode(struct inode *);
266extern int nilfs_setattr(struct dentry *, struct iattr *); 266extern int nilfs_setattr(struct dentry *, struct iattr *);
267int nilfs_permission(struct inode *inode, int mask, unsigned int flags); 267int nilfs_permission(struct inode *inode, int mask);
268int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); 268int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
269extern int nilfs_inode_dirty(struct inode *); 269extern int nilfs_inode_dirty(struct inode *);
270int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); 270int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 0f48e7c5d9e1..99e36107ff60 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1527,13 +1527,20 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
1527 * this problem for now. We do write the $BITMAP attribute if it is present 1527 * this problem for now. We do write the $BITMAP attribute if it is present
1528 * which is the important one for a directory so things are not too bad. 1528 * which is the important one for a directory so things are not too bad.
1529 */ 1529 */
1530static int ntfs_dir_fsync(struct file *filp, int datasync) 1530static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
1531 int datasync)
1531{ 1532{
1532 struct inode *bmp_vi, *vi = filp->f_mapping->host; 1533 struct inode *bmp_vi, *vi = filp->f_mapping->host;
1533 int err, ret; 1534 int err, ret;
1534 ntfs_attr na; 1535 ntfs_attr na;
1535 1536
1536 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 1537 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
1538
1539 err = filemap_write_and_wait_range(vi->i_mapping, start, end);
1540 if (err)
1541 return err;
1542 mutex_lock(&vi->i_mutex);
1543
1537 BUG_ON(!S_ISDIR(vi->i_mode)); 1544 BUG_ON(!S_ISDIR(vi->i_mode));
1538 /* If the bitmap attribute inode is in memory sync it, too. */ 1545 /* If the bitmap attribute inode is in memory sync it, too. */
1539 na.mft_no = vi->i_ino; 1546 na.mft_no = vi->i_ino;
@@ -1555,6 +1562,7 @@ static int ntfs_dir_fsync(struct file *filp, int datasync)
1555 else 1562 else
1556 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " 1563 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
1557 "%u.", datasync ? "data" : "", vi->i_ino, -ret); 1564 "%u.", datasync ? "data" : "", vi->i_ino, -ret);
1565 mutex_unlock(&vi->i_mutex);
1558 return ret; 1566 return ret;
1559} 1567}
1560 1568
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f4b1057abdd2..c587e2d27183 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1832,9 +1832,8 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1832 * fails again. 1832 * fails again.
1833 */ 1833 */
1834 if (unlikely(NInoTruncateFailed(ni))) { 1834 if (unlikely(NInoTruncateFailed(ni))) {
1835 down_write(&vi->i_alloc_sem); 1835 inode_dio_wait(vi);
1836 err = ntfs_truncate(vi); 1836 err = ntfs_truncate(vi);
1837 up_write(&vi->i_alloc_sem);
1838 if (err || NInoTruncateFailed(ni)) { 1837 if (err || NInoTruncateFailed(ni)) {
1839 if (!err) 1838 if (!err)
1840 err = -EIO; 1839 err = -EIO;
@@ -2153,12 +2152,19 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2153 * with this inode but since we have no simple way of getting to them we ignore 2152 * with this inode but since we have no simple way of getting to them we ignore
2154 * this problem for now. 2153 * this problem for now.
2155 */ 2154 */
2156static int ntfs_file_fsync(struct file *filp, int datasync) 2155static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
2156 int datasync)
2157{ 2157{
2158 struct inode *vi = filp->f_mapping->host; 2158 struct inode *vi = filp->f_mapping->host;
2159 int err, ret = 0; 2159 int err, ret = 0;
2160 2160
2161 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 2161 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
2162
2163 err = filemap_write_and_wait_range(vi->i_mapping, start, end);
2164 if (err)
2165 return err;
2166 mutex_lock(&vi->i_mutex);
2167
2162 BUG_ON(S_ISDIR(vi->i_mode)); 2168 BUG_ON(S_ISDIR(vi->i_mode));
2163 if (!datasync || !NInoNonResident(NTFS_I(vi))) 2169 if (!datasync || !NInoNonResident(NTFS_I(vi)))
2164 ret = __ntfs_write_inode(vi, 1); 2170 ret = __ntfs_write_inode(vi, 1);
@@ -2176,6 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, int datasync)
2176 else 2182 else
2177 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " 2183 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
2178 "%u.", datasync ? "data" : "", vi->i_ino, -ret); 2184 "%u.", datasync ? "data" : "", vi->i_ino, -ret);
2185 mutex_unlock(&vi->i_mutex);
2179 return ret; 2186 return ret;
2180} 2187}
2181 2188
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index c05d6dcf77a4..1371487da955 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2357,12 +2357,7 @@ static const char *es = " Leaving inconsistent metadata. Unmount and run "
2357 * 2357 *
2358 * Returns 0 on success or -errno on error. 2358 * Returns 0 on success or -errno on error.
2359 * 2359 *
2360 * Called with ->i_mutex held. In all but one case ->i_alloc_sem is held for 2360 * Called with ->i_mutex held.
2361 * writing. The only case in the kernel where ->i_alloc_sem is not held is
2362 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
2363 * with the current i_size as the offset. The analogous place in NTFS is in
2364 * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
2365 * without holding ->i_alloc_sem.
2366 */ 2361 */
2367int ntfs_truncate(struct inode *vi) 2362int ntfs_truncate(struct inode *vi)
2368{ 2363{
@@ -2887,8 +2882,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
2887 * We also abort all changes of user, group, and mode as we do not implement 2882 * We also abort all changes of user, group, and mode as we do not implement
2888 * the NTFS ACLs yet. 2883 * the NTFS ACLs yet.
2889 * 2884 *
2890 * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also 2885 * Called with ->i_mutex held.
2891 * called with ->i_alloc_sem held for writing.
2892 */ 2886 */
2893int ntfs_setattr(struct dentry *dentry, struct iattr *attr) 2887int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2894{ 2888{
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index e913ad130fdd..1cee970eb55a 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -290,14 +290,14 @@ static int ocfs2_set_acl(handle_t *handle,
290 return ret; 290 return ret;
291} 291}
292 292
293int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags) 293int ocfs2_check_acl(struct inode *inode, int mask)
294{ 294{
295 struct ocfs2_super *osb; 295 struct ocfs2_super *osb;
296 struct buffer_head *di_bh = NULL; 296 struct buffer_head *di_bh = NULL;
297 struct posix_acl *acl; 297 struct posix_acl *acl;
298 int ret = -EAGAIN; 298 int ret = -EAGAIN;
299 299
300 if (flags & IPERM_FLAG_RCU) 300 if (mask & MAY_NOT_BLOCK)
301 return -ECHILD; 301 return -ECHILD;
302 302
303 osb = OCFS2_SB(inode->i_sb); 303 osb = OCFS2_SB(inode->i_sb);
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 4fe7c9cf4bfb..5c5d31f05853 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,7 +26,7 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29extern int ocfs2_check_acl(struct inode *, int, unsigned int); 29extern int ocfs2_check_acl(struct inode *, int);
30extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
32 struct buffer_head *, struct buffer_head *, 32 struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index ac97bca282d2..c1efe939c774 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -551,9 +551,8 @@ bail:
551 551
552/* 552/*
553 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're 553 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
554 * particularly interested in the aio/dio case. Like the core uses 554 * particularly interested in the aio/dio case. We use the rw_lock DLM lock
555 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from 555 * to protect io on one node from truncation on another.
556 * truncation on another.
557 */ 556 */
558static void ocfs2_dio_end_io(struct kiocb *iocb, 557static void ocfs2_dio_end_io(struct kiocb *iocb,
559 loff_t offset, 558 loff_t offset,
@@ -568,10 +567,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
568 /* this io's submitter should not have unlocked this before we could */ 567 /* this io's submitter should not have unlocked this before we could */
569 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 568 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
570 569
571 if (ocfs2_iocb_is_sem_locked(iocb)) { 570 if (ocfs2_iocb_is_sem_locked(iocb))
572 up_read(&inode->i_alloc_sem);
573 ocfs2_iocb_clear_sem_locked(iocb); 571 ocfs2_iocb_clear_sem_locked(iocb);
574 }
575 572
576 ocfs2_iocb_clear_rw_locked(iocb); 573 ocfs2_iocb_clear_rw_locked(iocb);
577 574
@@ -580,6 +577,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
580 577
581 if (is_async) 578 if (is_async)
582 aio_complete(iocb, ret, 0); 579 aio_complete(iocb, ret, 0);
580 inode_dio_done(inode);
583} 581}
584 582
585/* 583/*
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index b1e35a392ca5..0fc2bd34039d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -171,7 +171,8 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file)
171 return 0; 171 return 0;
172} 172}
173 173
174static int ocfs2_sync_file(struct file *file, int datasync) 174static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
175 int datasync)
175{ 176{
176 int err = 0; 177 int err = 0;
177 journal_t *journal; 178 journal_t *journal;
@@ -184,6 +185,16 @@ static int ocfs2_sync_file(struct file *file, int datasync)
184 file->f_path.dentry->d_name.name, 185 file->f_path.dentry->d_name.name,
185 (unsigned long long)datasync); 186 (unsigned long long)datasync);
186 187
188 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
189 if (err)
190 return err;
191
192 /*
193 * Probably don't need the i_mutex at all in here, just putting it here
194 * to be consistent with how fsync used to be called, someone more
195 * familiar with the fs could possibly remove it.
196 */
197 mutex_lock(&inode->i_mutex);
187 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { 198 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
188 /* 199 /*
189 * We still have to flush drive's caches to get data to the 200 * We still have to flush drive's caches to get data to the
@@ -200,6 +211,7 @@ static int ocfs2_sync_file(struct file *file, int datasync)
200bail: 211bail:
201 if (err) 212 if (err)
202 mlog_errno(err); 213 mlog_errno(err);
214 mutex_unlock(&inode->i_mutex);
203 215
204 return (err < 0) ? -EIO : 0; 216 return (err < 0) ? -EIO : 0;
205} 217}
@@ -1142,6 +1154,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1142 if (status) 1154 if (status)
1143 goto bail_unlock; 1155 goto bail_unlock;
1144 1156
1157 inode_dio_wait(inode);
1158
1145 if (i_size_read(inode) > attr->ia_size) { 1159 if (i_size_read(inode) > attr->ia_size) {
1146 if (ocfs2_should_order_data(inode)) { 1160 if (ocfs2_should_order_data(inode)) {
1147 status = ocfs2_begin_ordered_truncate(inode, 1161 status = ocfs2_begin_ordered_truncate(inode,
@@ -1279,11 +1293,11 @@ bail:
1279 return err; 1293 return err;
1280} 1294}
1281 1295
1282int ocfs2_permission(struct inode *inode, int mask, unsigned int flags) 1296int ocfs2_permission(struct inode *inode, int mask)
1283{ 1297{
1284 int ret; 1298 int ret;
1285 1299
1286 if (flags & IPERM_FLAG_RCU) 1300 if (mask & MAY_NOT_BLOCK)
1287 return -ECHILD; 1301 return -ECHILD;
1288 1302
1289 ret = ocfs2_inode_lock(inode, NULL, 0); 1303 ret = ocfs2_inode_lock(inode, NULL, 0);
@@ -1293,7 +1307,7 @@ int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1293 goto out; 1307 goto out;
1294 } 1308 }
1295 1309
1296 ret = generic_permission(inode, mask, flags, ocfs2_check_acl); 1310 ret = generic_permission(inode, mask);
1297 1311
1298 ocfs2_inode_unlock(inode, 0); 1312 ocfs2_inode_unlock(inode, 0);
1299out: 1313out:
@@ -2236,9 +2250,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2236 ocfs2_iocb_clear_sem_locked(iocb); 2250 ocfs2_iocb_clear_sem_locked(iocb);
2237 2251
2238relock: 2252relock:
2239 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2253 /* to match setattr's i_mutex -> rw_lock ordering */
2240 if (direct_io) { 2254 if (direct_io) {
2241 down_read(&inode->i_alloc_sem);
2242 have_alloc_sem = 1; 2255 have_alloc_sem = 1;
2243 /* communicate with ocfs2_dio_end_io */ 2256 /* communicate with ocfs2_dio_end_io */
2244 ocfs2_iocb_set_sem_locked(iocb); 2257 ocfs2_iocb_set_sem_locked(iocb);
@@ -2290,7 +2303,6 @@ relock:
2290 */ 2303 */
2291 if (direct_io && !can_do_direct) { 2304 if (direct_io && !can_do_direct) {
2292 ocfs2_rw_unlock(inode, rw_level); 2305 ocfs2_rw_unlock(inode, rw_level);
2293 up_read(&inode->i_alloc_sem);
2294 2306
2295 have_alloc_sem = 0; 2307 have_alloc_sem = 0;
2296 rw_level = -1; 2308 rw_level = -1;
@@ -2361,8 +2373,7 @@ out_dio:
2361 /* 2373 /*
2362 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2374 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2363 * function pointer which is called when o_direct io completes so that 2375 * function pointer which is called when o_direct io completes so that
2364 * it can unlock our rw lock. (it's the clustered equivalent of 2376 * it can unlock our rw lock.
2365 * i_alloc_sem; protects truncate from racing with pending ios).
2366 * Unfortunately there are error cases which call end_io and others 2377 * Unfortunately there are error cases which call end_io and others
2367 * that don't. so we don't have to unlock the rw_lock if either an 2378 * that don't. so we don't have to unlock the rw_lock if either an
2368 * async dio is going to do it in the future or an end_io after an 2379 * async dio is going to do it in the future or an end_io after an
@@ -2378,10 +2389,8 @@ out:
2378 ocfs2_rw_unlock(inode, rw_level); 2389 ocfs2_rw_unlock(inode, rw_level);
2379 2390
2380out_sems: 2391out_sems:
2381 if (have_alloc_sem) { 2392 if (have_alloc_sem)
2382 up_read(&inode->i_alloc_sem);
2383 ocfs2_iocb_clear_sem_locked(iocb); 2393 ocfs2_iocb_clear_sem_locked(iocb);
2384 }
2385 2394
2386 mutex_unlock(&inode->i_mutex); 2395 mutex_unlock(&inode->i_mutex);
2387 2396
@@ -2531,7 +2540,6 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2531 * need locks to protect pending reads from racing with truncate. 2540 * need locks to protect pending reads from racing with truncate.
2532 */ 2541 */
2533 if (filp->f_flags & O_DIRECT) { 2542 if (filp->f_flags & O_DIRECT) {
2534 down_read(&inode->i_alloc_sem);
2535 have_alloc_sem = 1; 2543 have_alloc_sem = 1;
2536 ocfs2_iocb_set_sem_locked(iocb); 2544 ocfs2_iocb_set_sem_locked(iocb);
2537 2545
@@ -2574,10 +2582,9 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2574 } 2582 }
2575 2583
2576bail: 2584bail:
2577 if (have_alloc_sem) { 2585 if (have_alloc_sem)
2578 up_read(&inode->i_alloc_sem);
2579 ocfs2_iocb_clear_sem_locked(iocb); 2586 ocfs2_iocb_clear_sem_locked(iocb);
2580 } 2587
2581 if (rw_level != -1) 2588 if (rw_level != -1)
2582 ocfs2_rw_unlock(inode, rw_level); 2589 ocfs2_rw_unlock(inode, rw_level);
2583 2590
@@ -2593,12 +2600,14 @@ const struct inode_operations ocfs2_file_iops = {
2593 .listxattr = ocfs2_listxattr, 2600 .listxattr = ocfs2_listxattr,
2594 .removexattr = generic_removexattr, 2601 .removexattr = generic_removexattr,
2595 .fiemap = ocfs2_fiemap, 2602 .fiemap = ocfs2_fiemap,
2603 .check_acl = ocfs2_check_acl,
2596}; 2604};
2597 2605
2598const struct inode_operations ocfs2_special_file_iops = { 2606const struct inode_operations ocfs2_special_file_iops = {
2599 .setattr = ocfs2_setattr, 2607 .setattr = ocfs2_setattr,
2600 .getattr = ocfs2_getattr, 2608 .getattr = ocfs2_getattr,
2601 .permission = ocfs2_permission, 2609 .permission = ocfs2_permission,
2610 .check_acl = ocfs2_check_acl,
2602}; 2611};
2603 2612
2604/* 2613/*
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index f5afbbef6703..97bf761c9e7c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
63 struct kstat *stat); 63 struct kstat *stat);
64int ocfs2_permission(struct inode *inode, int mask, unsigned int flags); 64int ocfs2_permission(struct inode *inode, int mask);
65 65
66int ocfs2_should_update_atime(struct inode *inode, 66int ocfs2_should_update_atime(struct inode *inode,
67 struct vfsmount *vfsmnt); 67 struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e5d738cd9cc0..33889dc52dd7 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2498,4 +2498,5 @@ const struct inode_operations ocfs2_dir_iops = {
2498 .listxattr = ocfs2_listxattr, 2498 .listxattr = ocfs2_listxattr,
2499 .removexattr = generic_removexattr, 2499 .removexattr = generic_removexattr,
2500 .fiemap = ocfs2_fiemap, 2500 .fiemap = ocfs2_fiemap,
2501 .check_acl = ocfs2_check_acl,
2501}; 2502};
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ebfd3825f12a..cf7823382664 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4368,25 +4368,6 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
4368 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 4368 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
4369} 4369}
4370 4370
4371/* copied from user_path_parent. */
4372static int ocfs2_user_path_parent(const char __user *path,
4373 struct nameidata *nd, char **name)
4374{
4375 char *s = getname(path);
4376 int error;
4377
4378 if (IS_ERR(s))
4379 return PTR_ERR(s);
4380
4381 error = kern_path_parent(s, nd);
4382 if (error)
4383 putname(s);
4384 else
4385 *name = s;
4386
4387 return error;
4388}
4389
4390/** 4371/**
4391 * ocfs2_vfs_reflink - Create a reference-counted link 4372 * ocfs2_vfs_reflink - Create a reference-counted link
4392 * 4373 *
@@ -4460,10 +4441,8 @@ int ocfs2_reflink_ioctl(struct inode *inode,
4460 bool preserve) 4441 bool preserve)
4461{ 4442{
4462 struct dentry *new_dentry; 4443 struct dentry *new_dentry;
4463 struct nameidata nd; 4444 struct path old_path, new_path;
4464 struct path old_path;
4465 int error; 4445 int error;
4466 char *to = NULL;
4467 4446
4468 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) 4447 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
4469 return -EOPNOTSUPP; 4448 return -EOPNOTSUPP;
@@ -4474,39 +4453,33 @@ int ocfs2_reflink_ioctl(struct inode *inode,
4474 return error; 4453 return error;
4475 } 4454 }
4476 4455
4477 error = ocfs2_user_path_parent(newname, &nd, &to); 4456 new_dentry = user_path_create(AT_FDCWD, newname, &new_path, 0);
4478 if (error) { 4457 error = PTR_ERR(new_dentry);
4458 if (IS_ERR(new_dentry)) {
4479 mlog_errno(error); 4459 mlog_errno(error);
4480 goto out; 4460 goto out;
4481 } 4461 }
4482 4462
4483 error = -EXDEV; 4463 error = -EXDEV;
4484 if (old_path.mnt != nd.path.mnt) 4464 if (old_path.mnt != new_path.mnt) {
4485 goto out_release;
4486 new_dentry = lookup_create(&nd, 0);
4487 error = PTR_ERR(new_dentry);
4488 if (IS_ERR(new_dentry)) {
4489 mlog_errno(error); 4465 mlog_errno(error);
4490 goto out_unlock; 4466 goto out_dput;
4491 } 4467 }
4492 4468
4493 error = mnt_want_write(nd.path.mnt); 4469 error = mnt_want_write(new_path.mnt);
4494 if (error) { 4470 if (error) {
4495 mlog_errno(error); 4471 mlog_errno(error);
4496 goto out_dput; 4472 goto out_dput;
4497 } 4473 }
4498 4474
4499 error = ocfs2_vfs_reflink(old_path.dentry, 4475 error = ocfs2_vfs_reflink(old_path.dentry,
4500 nd.path.dentry->d_inode, 4476 new_path.dentry->d_inode,
4501 new_dentry, preserve); 4477 new_dentry, preserve);
4502 mnt_drop_write(nd.path.mnt); 4478 mnt_drop_write(new_path.mnt);
4503out_dput: 4479out_dput:
4504 dput(new_dentry); 4480 dput(new_dentry);
4505out_unlock: 4481 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
4506 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 4482 path_put(&new_path);
4507out_release:
4508 path_put(&nd.path);
4509 putname(to);
4510out: 4483out:
4511 path_put(&old_path); 4484 path_put(&old_path);
4512 4485
diff --git a/fs/open.c b/fs/open.c
index b52cf013ffa1..739b751aa73e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -793,7 +793,7 @@ out:
793 return nd->intent.open.file; 793 return nd->intent.open.file;
794out_err: 794out_err:
795 release_open_intent(nd); 795 release_open_intent(nd);
796 nd->intent.open.file = (struct file *)dentry; 796 nd->intent.open.file = ERR_CAST(dentry);
797 goto out; 797 goto out;
798} 798}
799EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 799EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c47719aaadef..91fb655a5cbf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -673,7 +673,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
673 p->m.private = p; 673 p->m.private = p;
674 p->ns = ns; 674 p->ns = ns;
675 p->root = root; 675 p->root = root;
676 p->event = ns->event; 676 p->m.poll_event = ns->event;
677 677
678 return 0; 678 return 0;
679 679
@@ -2167,9 +2167,9 @@ static const struct file_operations proc_fd_operations = {
2167 * /proc/pid/fd needs a special permission handler so that a process can still 2167 * /proc/pid/fd needs a special permission handler so that a process can still
2168 * access /proc/self/fd after it has executed a setuid(). 2168 * access /proc/self/fd after it has executed a setuid().
2169 */ 2169 */
2170static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) 2170static int proc_fd_permission(struct inode *inode, int mask)
2171{ 2171{
2172 int rv = generic_permission(inode, mask, flags, NULL); 2172 int rv = generic_permission(inode, mask);
2173 if (rv == 0) 2173 if (rv == 0)
2174 return 0; 2174 return 0;
2175 if (task_pid(current) == proc_pid(inode)) 2175 if (task_pid(current) == proc_pid(inode))
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d167de365a8d..1a77dbef226f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -294,7 +294,7 @@ out:
294 return ret; 294 return ret;
295} 295}
296 296
297static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags) 297static int proc_sys_permission(struct inode *inode, int mask)
298{ 298{
299 /* 299 /*
300 * sysctl entries that are not writeable, 300 * sysctl entries that are not writeable,
@@ -316,7 +316,7 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
316 if (!table) /* global root - r-xr-xr-x */ 316 if (!table) /* global root - r-xr-xr-x */
317 error = mask & MAY_WRITE ? -EACCES : 0; 317 error = mask & MAY_WRITE ? -EACCES : 0;
318 else /* Use the permissions on the sysctl table entry */ 318 else /* Use the permissions on the sysctl table entry */
319 error = sysctl_perm(head->root, table, mask); 319 error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK);
320 320
321 sysctl_head_finish(head); 321 sysctl_head_finish(head);
322 return error; 322 return error;
diff --git a/fs/read_write.c b/fs/read_write.c
index 5520f8ad5504..5907b49e4d7e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -64,6 +64,23 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
64 return file->f_pos; 64 return file->f_pos;
65 offset += file->f_pos; 65 offset += file->f_pos;
66 break; 66 break;
67 case SEEK_DATA:
68 /*
69 * In the generic case the entire file is data, so as long as
70 * offset isn't at the end of the file then the offset is data.
71 */
72 if (offset >= inode->i_size)
73 return -ENXIO;
74 break;
75 case SEEK_HOLE:
76 /*
77 * There is a virtual hole at the end of the file, so as long as
78 * offset isn't i_size or larger, return i_size.
79 */
80 if (offset >= inode->i_size)
81 return -ENXIO;
82 offset = inode->i_size;
83 break;
67 } 84 }
68 85
69 if (offset < 0 && !unsigned_offsets(file)) 86 if (offset < 0 && !unsigned_offsets(file))
@@ -128,12 +145,13 @@ EXPORT_SYMBOL(no_llseek);
128 145
129loff_t default_llseek(struct file *file, loff_t offset, int origin) 146loff_t default_llseek(struct file *file, loff_t offset, int origin)
130{ 147{
148 struct inode *inode = file->f_path.dentry->d_inode;
131 loff_t retval; 149 loff_t retval;
132 150
133 mutex_lock(&file->f_dentry->d_inode->i_mutex); 151 mutex_lock(&inode->i_mutex);
134 switch (origin) { 152 switch (origin) {
135 case SEEK_END: 153 case SEEK_END:
136 offset += i_size_read(file->f_path.dentry->d_inode); 154 offset += i_size_read(inode);
137 break; 155 break;
138 case SEEK_CUR: 156 case SEEK_CUR:
139 if (offset == 0) { 157 if (offset == 0) {
@@ -141,6 +159,26 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
141 goto out; 159 goto out;
142 } 160 }
143 offset += file->f_pos; 161 offset += file->f_pos;
162 break;
163 case SEEK_DATA:
164 /*
165 * In the generic case the entire file is data, so as
166 * long as offset isn't at the end of the file then the
167 * offset is data.
168 */
169 if (offset >= inode->i_size)
170 return -ENXIO;
171 break;
172 case SEEK_HOLE:
173 /*
174 * There is a virtual hole at the end of the file, so
175 * as long as offset isn't i_size or larger, return
176 * i_size.
177 */
178 if (offset >= inode->i_size)
179 return -ENXIO;
180 offset = inode->i_size;
181 break;
144 } 182 }
145 retval = -EINVAL; 183 retval = -EINVAL;
146 if (offset >= 0 || unsigned_offsets(file)) { 184 if (offset >= 0 || unsigned_offsets(file)) {
@@ -151,7 +189,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
151 retval = offset; 189 retval = offset;
152 } 190 }
153out: 191out:
154 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 192 mutex_unlock(&inode->i_mutex);
155 return retval; 193 return retval;
156} 194}
157EXPORT_SYMBOL(default_llseek); 195EXPORT_SYMBOL(default_llseek);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 198dabf1b2bb..133e9355dc6f 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -14,7 +14,8 @@
14extern const struct reiserfs_key MIN_KEY; 14extern const struct reiserfs_key MIN_KEY;
15 15
16static int reiserfs_readdir(struct file *, void *, filldir_t); 16static int reiserfs_readdir(struct file *, void *, filldir_t);
17static int reiserfs_dir_fsync(struct file *filp, int datasync); 17static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
18 int datasync);
18 19
19const struct file_operations reiserfs_dir_operations = { 20const struct file_operations reiserfs_dir_operations = {
20 .llseek = generic_file_llseek, 21 .llseek = generic_file_llseek,
@@ -27,13 +28,21 @@ const struct file_operations reiserfs_dir_operations = {
27#endif 28#endif
28}; 29};
29 30
30static int reiserfs_dir_fsync(struct file *filp, int datasync) 31static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
32 int datasync)
31{ 33{
32 struct inode *inode = filp->f_mapping->host; 34 struct inode *inode = filp->f_mapping->host;
33 int err; 35 int err;
36
37 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
38 if (err)
39 return err;
40
41 mutex_lock(&inode->i_mutex);
34 reiserfs_write_lock(inode->i_sb); 42 reiserfs_write_lock(inode->i_sb);
35 err = reiserfs_commit_for_inode(inode); 43 err = reiserfs_commit_for_inode(inode);
36 reiserfs_write_unlock(inode->i_sb); 44 reiserfs_write_unlock(inode->i_sb);
45 mutex_unlock(&inode->i_mutex);
37 if (err < 0) 46 if (err < 0)
38 return err; 47 return err;
39 return 0; 48 return 0;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 91f080cc76c8..c7156dc39ce7 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -140,12 +140,18 @@ static void reiserfs_vfs_truncate_file(struct inode *inode)
140 * be removed... 140 * be removed...
141 */ 141 */
142 142
143static int reiserfs_sync_file(struct file *filp, int datasync) 143static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
144 int datasync)
144{ 145{
145 struct inode *inode = filp->f_mapping->host; 146 struct inode *inode = filp->f_mapping->host;
146 int err; 147 int err;
147 int barrier_done; 148 int barrier_done;
148 149
150 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
151 if (err)
152 return err;
153
154 mutex_lock(&inode->i_mutex);
149 BUG_ON(!S_ISREG(inode->i_mode)); 155 BUG_ON(!S_ISREG(inode->i_mode));
150 err = sync_mapping_buffers(inode->i_mapping); 156 err = sync_mapping_buffers(inode->i_mapping);
151 reiserfs_write_lock(inode->i_sb); 157 reiserfs_write_lock(inode->i_sb);
@@ -153,6 +159,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync)
153 reiserfs_write_unlock(inode->i_sb); 159 reiserfs_write_unlock(inode->i_sb);
154 if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) 160 if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
155 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 161 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
162 mutex_unlock(&inode->i_mutex);
156 if (barrier_done < 0) 163 if (barrier_done < 0)
157 return barrier_done; 164 return barrier_done;
158 return (err < 0) ? -EIO : 0; 165 return (err < 0) ? -EIO : 0;
@@ -312,4 +319,5 @@ const struct inode_operations reiserfs_file_inode_operations = {
312 .listxattr = reiserfs_listxattr, 319 .listxattr = reiserfs_listxattr,
313 .removexattr = reiserfs_removexattr, 320 .removexattr = reiserfs_removexattr,
314 .permission = reiserfs_permission, 321 .permission = reiserfs_permission,
322 .check_acl = reiserfs_check_acl,
315}; 323};
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 4fd5bb33dbb5..2922b90ceac1 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3068,9 +3068,8 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3068 struct inode *inode = file->f_mapping->host; 3068 struct inode *inode = file->f_mapping->host;
3069 ssize_t ret; 3069 ssize_t ret;
3070 3070
3071 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3071 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
3072 offset, nr_segs, 3072 reiserfs_get_blocks_direct_io);
3073 reiserfs_get_blocks_direct_io, NULL);
3074 3073
3075 /* 3074 /*
3076 * In case of error extending write may have instantiated a few 3075 * In case of error extending write may have instantiated a few
@@ -3114,6 +3113,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3114 error = -EFBIG; 3113 error = -EFBIG;
3115 goto out; 3114 goto out;
3116 } 3115 }
3116
3117 inode_dio_wait(inode);
3118
3117 /* fill in hole pointers in the expanding truncate case. */ 3119 /* fill in hole pointers in the expanding truncate case. */
3118 if (attr->ia_size > inode->i_size) { 3120 if (attr->ia_size > inode->i_size) {
3119 error = generic_cont_expand_simple(inode, attr->ia_size); 3121 error = generic_cont_expand_simple(inode, attr->ia_size);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 118662690cdf..551f1b79dbc4 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1529,6 +1529,7 @@ const struct inode_operations reiserfs_dir_inode_operations = {
1529 .listxattr = reiserfs_listxattr, 1529 .listxattr = reiserfs_listxattr,
1530 .removexattr = reiserfs_removexattr, 1530 .removexattr = reiserfs_removexattr,
1531 .permission = reiserfs_permission, 1531 .permission = reiserfs_permission,
1532 .check_acl = reiserfs_check_acl,
1532}; 1533};
1533 1534
1534/* 1535/*
@@ -1545,6 +1546,7 @@ const struct inode_operations reiserfs_symlink_inode_operations = {
1545 .listxattr = reiserfs_listxattr, 1546 .listxattr = reiserfs_listxattr,
1546 .removexattr = reiserfs_removexattr, 1547 .removexattr = reiserfs_removexattr,
1547 .permission = reiserfs_permission, 1548 .permission = reiserfs_permission,
1549 .check_acl = reiserfs_check_acl,
1548 1550
1549}; 1551};
1550 1552
@@ -1558,5 +1560,5 @@ const struct inode_operations reiserfs_special_inode_operations = {
1558 .listxattr = reiserfs_listxattr, 1560 .listxattr = reiserfs_listxattr,
1559 .removexattr = reiserfs_removexattr, 1561 .removexattr = reiserfs_removexattr,
1560 .permission = reiserfs_permission, 1562 .permission = reiserfs_permission,
1561 1563 .check_acl = reiserfs_check_acl,
1562}; 1564};
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index aa91089162cb..14363b96b6af 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1643,6 +1643,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1643 /* Set default values for options: non-aggressive tails, RO on errors */ 1643 /* Set default values for options: non-aggressive tails, RO on errors */
1644 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); 1644 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
1645 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); 1645 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO);
1646 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH);
1646 /* no preallocation minimum, be smart in 1647 /* no preallocation minimum, be smart in
1647 reiserfs_file_write instead */ 1648 reiserfs_file_write instead */
1648 REISERFS_SB(s)->s_alloc_options.preallocmin = 0; 1649 REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d78089690965..6938d8c68d6e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -555,11 +555,10 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
555 555
556 reiserfs_write_unlock(inode->i_sb); 556 reiserfs_write_unlock(inode->i_sb);
557 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 557 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
558 down_write(&dentry->d_inode->i_alloc_sem); 558 inode_dio_wait(dentry->d_inode);
559 reiserfs_write_lock(inode->i_sb); 559 reiserfs_write_lock(inode->i_sb);
560 560
561 err = reiserfs_setattr(dentry, &newattrs); 561 err = reiserfs_setattr(dentry, &newattrs);
562 up_write(&dentry->d_inode->i_alloc_sem);
563 mutex_unlock(&dentry->d_inode->i_mutex); 562 mutex_unlock(&dentry->d_inode->i_mutex);
564 } else 563 } else
565 update_ctime(inode); 564 update_ctime(inode);
@@ -868,12 +867,18 @@ out:
868 return err; 867 return err;
869} 868}
870 869
871static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags) 870int reiserfs_check_acl(struct inode *inode, int mask)
872{ 871{
873 struct posix_acl *acl; 872 struct posix_acl *acl;
874 int error = -EAGAIN; /* do regular unix permission checks by default */ 873 int error = -EAGAIN; /* do regular unix permission checks by default */
875 874
876 if (flags & IPERM_FLAG_RCU) 875 /*
876 * Stat data v1 doesn't support ACLs.
877 */
878 if (get_inode_sd_version(inode) == STAT_DATA_V1)
879 return -EAGAIN;
880
881 if (mask & MAY_NOT_BLOCK)
877 return -ECHILD; 882 return -ECHILD;
878 883
879 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 884 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
@@ -952,7 +957,7 @@ static int xattr_mount_check(struct super_block *s)
952 return 0; 957 return 0;
953} 958}
954 959
955int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) 960int reiserfs_permission(struct inode *inode, int mask)
956{ 961{
957 /* 962 /*
958 * We don't do permission checks on the internal objects. 963 * We don't do permission checks on the internal objects.
@@ -961,15 +966,7 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
961 if (IS_PRIVATE(inode)) 966 if (IS_PRIVATE(inode))
962 return 0; 967 return 0;
963 968
964#ifdef CONFIG_REISERFS_FS_XATTR 969 return generic_permission(inode, mask);
965 /*
966 * Stat data v1 doesn't support ACLs.
967 */
968 if (get_inode_sd_version(inode) != STAT_DATA_V1)
969 return generic_permission(inode, mask, flags,
970 reiserfs_check_acl);
971#endif
972 return generic_permission(inode, mask, flags, NULL);
973} 970}
974 971
975static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 972static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 4bc63ac64bc0..0682b38d7e31 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -220,11 +220,6 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
220 blk, off, ino_num); 220 blk, off, ino_num);
221 221
222 inode = squashfs_iget(dir->i_sb, ino, ino_num); 222 inode = squashfs_iget(dir->i_sb, ino, ino_num);
223 if (IS_ERR(inode)) {
224 err = PTR_ERR(inode);
225 goto failed;
226 }
227
228 goto exit_lookup; 223 goto exit_lookup;
229 } 224 }
230 } 225 }
@@ -232,10 +227,7 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
232 227
233exit_lookup: 228exit_lookup:
234 kfree(dire); 229 kfree(dire);
235 if (inode) 230 return d_splice_alias(inode, dentry);
236 return d_splice_alias(inode, dentry);
237 d_add(dentry, inode);
238 return ERR_PTR(0);
239 231
240data_error: 232data_error:
241 err = -EIO; 233 err = -EIO;
diff --git a/fs/super.c b/fs/super.c
index ab3d672db0de..7943f04cb3a9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,6 +38,69 @@
38LIST_HEAD(super_blocks); 38LIST_HEAD(super_blocks);
39DEFINE_SPINLOCK(sb_lock); 39DEFINE_SPINLOCK(sb_lock);
40 40
41/*
42 * One thing we have to be careful of with a per-sb shrinker is that we don't
43 * drop the last active reference to the superblock from within the shrinker.
44 * If that happens we could trigger unregistering the shrinker from within the
45 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
46 * take a passive reference to the superblock to avoid this from occurring.
47 */
48static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
49{
50 struct super_block *sb;
51 int fs_objects = 0;
52 int total_objects;
53
54 sb = container_of(shrink, struct super_block, s_shrink);
55
56 /*
57 * Deadlock avoidance. We may hold various FS locks, and we don't want
58 * to recurse into the FS that called us in clear_inode() and friends..
59 */
60 if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
61 return -1;
62
63 if (!grab_super_passive(sb))
64 return -1;
65
66 if (sb->s_op && sb->s_op->nr_cached_objects)
67 fs_objects = sb->s_op->nr_cached_objects(sb);
68
69 total_objects = sb->s_nr_dentry_unused +
70 sb->s_nr_inodes_unused + fs_objects + 1;
71
72 if (sc->nr_to_scan) {
73 int dentries;
74 int inodes;
75
76 /* proportion the scan between the caches */
77 dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) /
78 total_objects;
79 inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) /
80 total_objects;
81 if (fs_objects)
82 fs_objects = (sc->nr_to_scan * fs_objects) /
83 total_objects;
84 /*
85 * prune the dcache first as the icache is pinned by it, then
86 * prune the icache, followed by the filesystem specific caches
87 */
88 prune_dcache_sb(sb, dentries);
89 prune_icache_sb(sb, inodes);
90
91 if (fs_objects && sb->s_op->free_cached_objects) {
92 sb->s_op->free_cached_objects(sb, fs_objects);
93 fs_objects = sb->s_op->nr_cached_objects(sb);
94 }
95 total_objects = sb->s_nr_dentry_unused +
96 sb->s_nr_inodes_unused + fs_objects;
97 }
98
99 total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure;
100 drop_super(sb);
101 return total_objects;
102}
103
41/** 104/**
42 * alloc_super - create new superblock 105 * alloc_super - create new superblock
43 * @type: filesystem type superblock should belong to 106 * @type: filesystem type superblock should belong to
@@ -77,6 +140,8 @@ static struct super_block *alloc_super(struct file_system_type *type)
77 INIT_HLIST_BL_HEAD(&s->s_anon); 140 INIT_HLIST_BL_HEAD(&s->s_anon);
78 INIT_LIST_HEAD(&s->s_inodes); 141 INIT_LIST_HEAD(&s->s_inodes);
79 INIT_LIST_HEAD(&s->s_dentry_lru); 142 INIT_LIST_HEAD(&s->s_dentry_lru);
143 INIT_LIST_HEAD(&s->s_inode_lru);
144 spin_lock_init(&s->s_inode_lru_lock);
80 init_rwsem(&s->s_umount); 145 init_rwsem(&s->s_umount);
81 mutex_init(&s->s_lock); 146 mutex_init(&s->s_lock);
82 lockdep_set_class(&s->s_umount, &type->s_umount_key); 147 lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -114,6 +179,10 @@ static struct super_block *alloc_super(struct file_system_type *type)
114 s->s_op = &default_op; 179 s->s_op = &default_op;
115 s->s_time_gran = 1000000000; 180 s->s_time_gran = 1000000000;
116 s->cleancache_poolid = -1; 181 s->cleancache_poolid = -1;
182
183 s->s_shrink.seeks = DEFAULT_SEEKS;
184 s->s_shrink.shrink = prune_super;
185 s->s_shrink.batch = 1024;
117 } 186 }
118out: 187out:
119 return s; 188 return s;
@@ -181,6 +250,10 @@ void deactivate_locked_super(struct super_block *s)
181 if (atomic_dec_and_test(&s->s_active)) { 250 if (atomic_dec_and_test(&s->s_active)) {
182 cleancache_flush_fs(s); 251 cleancache_flush_fs(s);
183 fs->kill_sb(s); 252 fs->kill_sb(s);
253
254 /* caches are now gone, we can safely kill the shrinker now */
255 unregister_shrinker(&s->s_shrink);
256
184 /* 257 /*
185 * We need to call rcu_barrier so all the delayed rcu free 258 * We need to call rcu_barrier so all the delayed rcu free
186 * inodes are flushed before we release the fs module. 259 * inodes are flushed before we release the fs module.
@@ -241,6 +314,39 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
241} 314}
242 315
243/* 316/*
317 * grab_super_passive - acquire a passive reference
318 * @s: reference we are trying to grab
319 *
320 * Tries to acquire a passive reference. This is used in places where we
321 * cannot take an active reference but we need to ensure that the
322 * superblock does not go away while we are working on it. It returns
323 * false if a reference was not gained, and returns true with the s_umount
324 * lock held in read mode if a reference is gained. On successful return,
325 * the caller must drop the s_umount lock and the passive reference when
326 * done.
327 */
328bool grab_super_passive(struct super_block *sb)
329{
330 spin_lock(&sb_lock);
331 if (list_empty(&sb->s_instances)) {
332 spin_unlock(&sb_lock);
333 return false;
334 }
335
336 sb->s_count++;
337 spin_unlock(&sb_lock);
338
339 if (down_read_trylock(&sb->s_umount)) {
340 if (sb->s_root)
341 return true;
342 up_read(&sb->s_umount);
343 }
344
345 put_super(sb);
346 return false;
347}
348
349/*
244 * Superblock locking. We really ought to get rid of these two. 350 * Superblock locking. We really ought to get rid of these two.
245 */ 351 */
246void lock_super(struct super_block * sb) 352void lock_super(struct super_block * sb)
@@ -276,7 +382,6 @@ void generic_shutdown_super(struct super_block *sb)
276{ 382{
277 const struct super_operations *sop = sb->s_op; 383 const struct super_operations *sop = sb->s_op;
278 384
279
280 if (sb->s_root) { 385 if (sb->s_root) {
281 shrink_dcache_for_umount(sb); 386 shrink_dcache_for_umount(sb);
282 sync_filesystem(sb); 387 sync_filesystem(sb);
@@ -364,6 +469,7 @@ retry:
364 list_add(&s->s_instances, &type->fs_supers); 469 list_add(&s->s_instances, &type->fs_supers);
365 spin_unlock(&sb_lock); 470 spin_unlock(&sb_lock);
366 get_filesystem(type); 471 get_filesystem(type);
472 register_shrinker(&s->s_shrink);
367 return s; 473 return s;
368} 474}
369 475
@@ -452,6 +558,42 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
452} 558}
453 559
454/** 560/**
561 * iterate_supers_type - call function for superblocks of given type
562 * @type: fs type
563 * @f: function to call
564 * @arg: argument to pass to it
565 *
566 * Scans the superblock list and calls given function, passing it
567 * locked superblock and given argument.
568 */
569void iterate_supers_type(struct file_system_type *type,
570 void (*f)(struct super_block *, void *), void *arg)
571{
572 struct super_block *sb, *p = NULL;
573
574 spin_lock(&sb_lock);
575 list_for_each_entry(sb, &type->fs_supers, s_instances) {
576 sb->s_count++;
577 spin_unlock(&sb_lock);
578
579 down_read(&sb->s_umount);
580 if (sb->s_root)
581 f(sb, arg);
582 up_read(&sb->s_umount);
583
584 spin_lock(&sb_lock);
585 if (p)
586 __put_super(p);
587 p = sb;
588 }
589 if (p)
590 __put_super(p);
591 spin_unlock(&sb_lock);
592}
593
594EXPORT_SYMBOL(iterate_supers_type);
595
596/**
455 * get_super - get the superblock of a device 597 * get_super - get the superblock of a device
456 * @bdev: device to get the superblock for 598 * @bdev: device to get the superblock for
457 * 599 *
@@ -657,7 +799,7 @@ static DEFINE_IDA(unnamed_dev_ida);
657static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 799static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
658static int unnamed_dev_start = 0; /* don't bother trying below it */ 800static int unnamed_dev_start = 0; /* don't bother trying below it */
659 801
660int set_anon_super(struct super_block *s, void *data) 802int get_anon_bdev(dev_t *p)
661{ 803{
662 int dev; 804 int dev;
663 int error; 805 int error;
@@ -684,24 +826,38 @@ int set_anon_super(struct super_block *s, void *data)
684 spin_unlock(&unnamed_dev_lock); 826 spin_unlock(&unnamed_dev_lock);
685 return -EMFILE; 827 return -EMFILE;
686 } 828 }
687 s->s_dev = MKDEV(0, dev & MINORMASK); 829 *p = MKDEV(0, dev & MINORMASK);
688 s->s_bdi = &noop_backing_dev_info;
689 return 0; 830 return 0;
690} 831}
832EXPORT_SYMBOL(get_anon_bdev);
691 833
692EXPORT_SYMBOL(set_anon_super); 834void free_anon_bdev(dev_t dev)
693
694void kill_anon_super(struct super_block *sb)
695{ 835{
696 int slot = MINOR(sb->s_dev); 836 int slot = MINOR(dev);
697
698 generic_shutdown_super(sb);
699 spin_lock(&unnamed_dev_lock); 837 spin_lock(&unnamed_dev_lock);
700 ida_remove(&unnamed_dev_ida, slot); 838 ida_remove(&unnamed_dev_ida, slot);
701 if (slot < unnamed_dev_start) 839 if (slot < unnamed_dev_start)
702 unnamed_dev_start = slot; 840 unnamed_dev_start = slot;
703 spin_unlock(&unnamed_dev_lock); 841 spin_unlock(&unnamed_dev_lock);
704} 842}
843EXPORT_SYMBOL(free_anon_bdev);
844
845int set_anon_super(struct super_block *s, void *data)
846{
847 int error = get_anon_bdev(&s->s_dev);
848 if (!error)
849 s->s_bdi = &noop_backing_dev_info;
850 return error;
851}
852
853EXPORT_SYMBOL(set_anon_super);
854
855void kill_anon_super(struct super_block *sb)
856{
857 dev_t dev = sb->s_dev;
858 generic_shutdown_super(sb);
859 free_anon_bdev(dev);
860}
705 861
706EXPORT_SYMBOL(kill_anon_super); 862EXPORT_SYMBOL(kill_anon_super);
707 863
diff --git a/fs/sync.c b/fs/sync.c
index c38ec163da6c..c98a7477edfd 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -165,28 +165,9 @@ SYSCALL_DEFINE1(syncfs, int, fd)
165 */ 165 */
166int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) 166int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
167{ 167{
168 struct address_space *mapping = file->f_mapping; 168 if (!file->f_op || !file->f_op->fsync)
169 int err, ret; 169 return -EINVAL;
170 170 return file->f_op->fsync(file, start, end, datasync);
171 if (!file->f_op || !file->f_op->fsync) {
172 ret = -EINVAL;
173 goto out;
174 }
175
176 ret = filemap_write_and_wait_range(mapping, start, end);
177
178 /*
179 * We need to protect against concurrent writers, which could cause
180 * livelocks in fsync_buffers_list().
181 */
182 mutex_lock(&mapping->host->i_mutex);
183 err = file->f_op->fsync(file, datasync);
184 if (!ret)
185 ret = err;
186 mutex_unlock(&mapping->host->i_mutex);
187
188out:
189 return ret;
190} 171}
191EXPORT_SYMBOL(vfs_fsync_range); 172EXPORT_SYMBOL(vfs_fsync_range);
192 173
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0a12eb89cd32..e3f091a81c72 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -349,11 +349,11 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
349 return -ENOENT; 349 return -ENOENT;
350} 350}
351 351
352int sysfs_permission(struct inode *inode, int mask, unsigned int flags) 352int sysfs_permission(struct inode *inode, int mask)
353{ 353{
354 struct sysfs_dirent *sd; 354 struct sysfs_dirent *sd;
355 355
356 if (flags & IPERM_FLAG_RCU) 356 if (mask & MAY_NOT_BLOCK)
357 return -ECHILD; 357 return -ECHILD;
358 358
359 sd = inode->i_private; 359 sd = inode->i_private;
@@ -362,5 +362,5 @@ int sysfs_permission(struct inode *inode, int mask, unsigned int flags)
362 sysfs_refresh_inode(sd, inode); 362 sysfs_refresh_inode(sd, inode);
363 mutex_unlock(&sysfs_mutex); 363 mutex_unlock(&sysfs_mutex);
364 364
365 return generic_permission(inode, mask, flags, NULL); 365 return generic_permission(inode, mask);
366} 366}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 2ed2404f3113..845ab3ad229d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -201,7 +201,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
201struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); 201struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
202void sysfs_evict_inode(struct inode *inode); 202void sysfs_evict_inode(struct inode *inode);
203int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); 203int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
204int sysfs_permission(struct inode *inode, int mask, unsigned int flags); 204int sysfs_permission(struct inode *inode, int mask);
205int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 205int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
206int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 206int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
207int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 207int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 7cf738a4544d..f9c234bf33d3 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1304,7 +1304,7 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
1304 return NULL; 1304 return NULL;
1305} 1305}
1306 1306
1307int ubifs_fsync(struct file *file, int datasync) 1307int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1308{ 1308{
1309 struct inode *inode = file->f_mapping->host; 1309 struct inode *inode = file->f_mapping->host;
1310 struct ubifs_info *c = inode->i_sb->s_fs_info; 1310 struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -1319,14 +1319,16 @@ int ubifs_fsync(struct file *file, int datasync)
1319 */ 1319 */
1320 return 0; 1320 return 0;
1321 1321
1322 /* 1322 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
1323 * VFS has already synchronized dirty pages for this inode. Synchronize 1323 if (err)
1324 * the inode unless this is a 'datasync()' call. 1324 return err;
1325 */ 1325 mutex_lock(&inode->i_mutex);
1326
1327 /* Synchronize the inode unless this is a 'datasync()' call. */
1326 if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { 1328 if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
1327 err = inode->i_sb->s_op->write_inode(inode, NULL); 1329 err = inode->i_sb->s_op->write_inode(inode, NULL);
1328 if (err) 1330 if (err)
1329 return err; 1331 goto out;
1330 } 1332 }
1331 1333
1332 /* 1334 /*
@@ -1334,10 +1336,9 @@ int ubifs_fsync(struct file *file, int datasync)
1334 * them. 1336 * them.
1335 */ 1337 */
1336 err = ubifs_sync_wbufs_by_inode(c, inode); 1338 err = ubifs_sync_wbufs_by_inode(c, inode);
1337 if (err) 1339out:
1338 return err; 1340 mutex_unlock(&inode->i_mutex);
1339 1341 return err;
1340 return 0;
1341} 1342}
1342 1343
1343/** 1344/**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 702b79258e30..27f22551f805 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1729,7 +1729,7 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
1729int ubifs_calc_dark(const struct ubifs_info *c, int spc); 1729int ubifs_calc_dark(const struct ubifs_info *c, int spc);
1730 1730
1731/* file.c */ 1731/* file.c */
1732int ubifs_fsync(struct file *file, int datasync); 1732int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
1733int ubifs_setattr(struct dentry *dentry, struct iattr *attr); 1733int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
1734 1734
1735/* dir.c */ 1735/* dir.c */
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2a346bb1d9f5..d8ffa7cc661d 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -150,7 +150,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
150 long old_block, new_block; 150 long old_block, new_block;
151 int result = -EINVAL; 151 int result = -EINVAL;
152 152
153 if (file_permission(filp, MAY_READ) != 0) { 153 if (inode_permission(inode, MAY_READ) != 0) {
154 udf_debug("no permission to access inode %lu\n", inode->i_ino); 154 udf_debug("no permission to access inode %lu\n", inode->i_ino);
155 result = -EPERM; 155 result = -EPERM;
156 goto out; 156 goto out;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index b57aab9a1184..639d49162241 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -59,8 +59,6 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
59 if (ino) 59 if (ino)
60 inode = ufs_iget(dir->i_sb, ino); 60 inode = ufs_iget(dir->i_sb, ino);
61 unlock_ufs(dir->i_sb); 61 unlock_ufs(dir->i_sb);
62 if (IS_ERR(inode))
63 return ERR_CAST(inode);
64 return d_splice_alias(inode, dentry); 62 return d_splice_alias(inode, dentry);
65} 63}
66 64
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 115ac6919533..cac48fe22ad5 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,7 +219,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219} 219}
220 220
221int 221int
222xfs_check_acl(struct inode *inode, int mask, unsigned int flags) 222xfs_check_acl(struct inode *inode, int mask)
223{ 223{
224 struct xfs_inode *ip; 224 struct xfs_inode *ip;
225 struct posix_acl *acl; 225 struct posix_acl *acl;
@@ -235,7 +235,7 @@ xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
235 if (!XFS_IFORK_Q(ip)) 235 if (!XFS_IFORK_Q(ip))
236 return -EAGAIN; 236 return -EAGAIN;
237 237
238 if (flags & IPERM_FLAG_RCU) { 238 if (mask & MAY_NOT_BLOCK) {
239 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 239 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
240 return -ECHILD; 240 return -ECHILD;
241 return -EAGAIN; 241 return -EAGAIN;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 26384fe3f26d..63e971e2b837 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1329,6 +1329,9 @@ xfs_end_io_direct_write(
1329 } else { 1329 } else {
1330 xfs_finish_ioend_sync(ioend); 1330 xfs_finish_ioend_sync(ioend);
1331 } 1331 }
1332
1333 /* XXX: probably should move into the real I/O completion handler */
1334 inode_dio_done(ioend->io_inode);
1332} 1335}
1333 1336
1334STATIC ssize_t 1337STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 8073f61efb8e..cca00f49e092 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -127,6 +127,8 @@ xfs_iozero(
127STATIC int 127STATIC int
128xfs_file_fsync( 128xfs_file_fsync(
129 struct file *file, 129 struct file *file,
130 loff_t start,
131 loff_t end,
130 int datasync) 132 int datasync)
131{ 133{
132 struct inode *inode = file->f_mapping->host; 134 struct inode *inode = file->f_mapping->host;
@@ -138,6 +140,10 @@ xfs_file_fsync(
138 140
139 trace_xfs_file_fsync(ip); 141 trace_xfs_file_fsync(ip);
140 142
143 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
144 if (error)
145 return error;
146
141 if (XFS_FORCED_SHUTDOWN(mp)) 147 if (XFS_FORCED_SHUTDOWN(mp))
142 return -XFS_ERROR(EIO); 148 return -XFS_ERROR(EIO);
143 149
@@ -875,18 +881,11 @@ xfs_file_aio_write(
875 /* Handle various SYNC-type writes */ 881 /* Handle various SYNC-type writes */
876 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { 882 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
877 loff_t end = pos + ret - 1; 883 loff_t end = pos + ret - 1;
878 int error, error2;
879 884
880 xfs_rw_iunlock(ip, iolock); 885 xfs_rw_iunlock(ip, iolock);
881 error = filemap_write_and_wait_range(mapping, pos, end); 886 ret = -xfs_file_fsync(file, pos, end,
887 (file->f_flags & __O_SYNC) ? 0 : 1);
882 xfs_rw_ilock(ip, iolock); 888 xfs_rw_ilock(ip, iolock);
883
884 error2 = -xfs_file_fsync(file,
885 (file->f_flags & __O_SYNC) ? 0 : 1);
886 if (error)
887 ret = error;
888 else if (error2)
889 ret = error2;
890 } 889 }
891 890
892out_unlock: 891out_unlock:
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 25fd2cd6c8b0..9a72dda58bd0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1024,11 +1024,6 @@ xfs_fs_put_super(
1024{ 1024{
1025 struct xfs_mount *mp = XFS_M(sb); 1025 struct xfs_mount *mp = XFS_M(sb);
1026 1026
1027 /*
1028 * Unregister the memory shrinker before we tear down the mount
1029 * structure so we don't have memory reclaim racing with us here.
1030 */
1031 xfs_inode_shrinker_unregister(mp);
1032 xfs_syncd_stop(mp); 1027 xfs_syncd_stop(mp);
1033 1028
1034 /* 1029 /*
@@ -1411,8 +1406,6 @@ xfs_fs_fill_super(
1411 sb->s_time_gran = 1; 1406 sb->s_time_gran = 1;
1412 set_posix_acl_flag(sb); 1407 set_posix_acl_flag(sb);
1413 1408
1414 xfs_inode_shrinker_register(mp);
1415
1416 error = xfs_mountfs(mp); 1409 error = xfs_mountfs(mp);
1417 if (error) 1410 if (error)
1418 goto out_filestream_unmount; 1411 goto out_filestream_unmount;
@@ -1439,7 +1432,6 @@ xfs_fs_fill_super(
1439 return 0; 1432 return 0;
1440 1433
1441 out_filestream_unmount: 1434 out_filestream_unmount:
1442 xfs_inode_shrinker_unregister(mp);
1443 xfs_filestream_unmount(mp); 1435 xfs_filestream_unmount(mp);
1444 out_free_sb: 1436 out_free_sb:
1445 xfs_freesb(mp); 1437 xfs_freesb(mp);
@@ -1458,8 +1450,6 @@ xfs_fs_fill_super(
1458 out_syncd_stop: 1450 out_syncd_stop:
1459 xfs_syncd_stop(mp); 1451 xfs_syncd_stop(mp);
1460 out_unmount: 1452 out_unmount:
1461 xfs_inode_shrinker_unregister(mp);
1462
1463 /* 1453 /*
1464 * Blow away any referenced inode in the filestreams cache. 1454 * Blow away any referenced inode in the filestreams cache.
1465 * This can and will cause log traffic as inodes go inactive 1455 * This can and will cause log traffic as inodes go inactive
@@ -1483,6 +1473,21 @@ xfs_fs_mount(
1483 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); 1473 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1484} 1474}
1485 1475
1476static int
1477xfs_fs_nr_cached_objects(
1478 struct super_block *sb)
1479{
1480 return xfs_reclaim_inodes_count(XFS_M(sb));
1481}
1482
1483static void
1484xfs_fs_free_cached_objects(
1485 struct super_block *sb,
1486 int nr_to_scan)
1487{
1488 xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
1489}
1490
1486static const struct super_operations xfs_super_operations = { 1491static const struct super_operations xfs_super_operations = {
1487 .alloc_inode = xfs_fs_alloc_inode, 1492 .alloc_inode = xfs_fs_alloc_inode,
1488 .destroy_inode = xfs_fs_destroy_inode, 1493 .destroy_inode = xfs_fs_destroy_inode,
@@ -1496,6 +1501,8 @@ static const struct super_operations xfs_super_operations = {
1496 .statfs = xfs_fs_statfs, 1501 .statfs = xfs_fs_statfs,
1497 .remount_fs = xfs_fs_remount, 1502 .remount_fs = xfs_fs_remount,
1498 .show_options = xfs_fs_show_options, 1503 .show_options = xfs_fs_show_options,
1504 .nr_cached_objects = xfs_fs_nr_cached_objects,
1505 .free_cached_objects = xfs_fs_free_cached_objects,
1499}; 1506};
1500 1507
1501static struct file_system_type xfs_fs_type = { 1508static struct file_system_type xfs_fs_type = {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 5cc158e52d4c..e4c938afb910 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -179,6 +179,8 @@ restart:
179 if (error == EFSCORRUPTED) 179 if (error == EFSCORRUPTED)
180 break; 180 break;
181 181
182 cond_resched();
183
182 } while (nr_found && !done); 184 } while (nr_found && !done);
183 185
184 if (skipped) { 186 if (skipped) {
@@ -984,6 +986,8 @@ restart:
984 986
985 *nr_to_scan -= XFS_LOOKUP_BATCH; 987 *nr_to_scan -= XFS_LOOKUP_BATCH;
986 988
989 cond_resched();
990
987 } while (nr_found && !done && *nr_to_scan > 0); 991 } while (nr_found && !done && *nr_to_scan > 0);
988 992
989 if (trylock && !done) 993 if (trylock && !done)
@@ -1001,7 +1005,7 @@ restart:
1001 * ensure that when we get more reclaimers than AGs we block rather 1005 * ensure that when we get more reclaimers than AGs we block rather
1002 * than spin trying to execute reclaim. 1006 * than spin trying to execute reclaim.
1003 */ 1007 */
1004 if (trylock && skipped && *nr_to_scan > 0) { 1008 if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
1005 trylock = 0; 1009 trylock = 0;
1006 goto restart; 1010 goto restart;
1007 } 1011 }
@@ -1019,44 +1023,38 @@ xfs_reclaim_inodes(
1019} 1023}
1020 1024
1021/* 1025/*
1022 * Inode cache shrinker. 1026 * Scan a certain number of inodes for reclaim.
1023 * 1027 *
1024 * When called we make sure that there is a background (fast) inode reclaim in 1028 * When called we make sure that there is a background (fast) inode reclaim in
1025 * progress, while we will throttle the speed of reclaim via doiing synchronous 1029 * progress, while we will throttle the speed of reclaim via doing synchronous
1026 * reclaim of inodes. That means if we come across dirty inodes, we wait for 1030 * reclaim of inodes. That means if we come across dirty inodes, we wait for
1027 * them to be cleaned, which we hope will not be very long due to the 1031 * them to be cleaned, which we hope will not be very long due to the
1028 * background walker having already kicked the IO off on those dirty inodes. 1032 * background walker having already kicked the IO off on those dirty inodes.
1029 */ 1033 */
1030static int 1034void
1031xfs_reclaim_inode_shrink( 1035xfs_reclaim_inodes_nr(
1032 struct shrinker *shrink, 1036 struct xfs_mount *mp,
1033 struct shrink_control *sc) 1037 int nr_to_scan)
1034{ 1038{
1035 struct xfs_mount *mp; 1039 /* kick background reclaimer and push the AIL */
1036 struct xfs_perag *pag; 1040 xfs_syncd_queue_reclaim(mp);
1037 xfs_agnumber_t ag; 1041 xfs_ail_push_all(mp->m_ail);
1038 int reclaimable;
1039 int nr_to_scan = sc->nr_to_scan;
1040 gfp_t gfp_mask = sc->gfp_mask;
1041
1042 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1043 if (nr_to_scan) {
1044 /* kick background reclaimer and push the AIL */
1045 xfs_syncd_queue_reclaim(mp);
1046 xfs_ail_push_all(mp->m_ail);
1047 1042
1048 if (!(gfp_mask & __GFP_FS)) 1043 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
1049 return -1; 1044}
1050 1045
1051 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, 1046/*
1052 &nr_to_scan); 1047 * Return the number of reclaimable inodes in the filesystem for
1053 /* terminate if we don't exhaust the scan */ 1048 * the shrinker to determine how much to reclaim.
1054 if (nr_to_scan > 0) 1049 */
1055 return -1; 1050int
1056 } 1051xfs_reclaim_inodes_count(
1052 struct xfs_mount *mp)
1053{
1054 struct xfs_perag *pag;
1055 xfs_agnumber_t ag = 0;
1056 int reclaimable = 0;
1057 1057
1058 reclaimable = 0;
1059 ag = 0;
1060 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 1058 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
1061 ag = pag->pag_agno + 1; 1059 ag = pag->pag_agno + 1;
1062 reclaimable += pag->pag_ici_reclaimable; 1060 reclaimable += pag->pag_ici_reclaimable;
@@ -1065,18 +1063,3 @@ xfs_reclaim_inode_shrink(
1065 return reclaimable; 1063 return reclaimable;
1066} 1064}
1067 1065
1068void
1069xfs_inode_shrinker_register(
1070 struct xfs_mount *mp)
1071{
1072 mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
1073 mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
1074 register_shrinker(&mp->m_inode_shrink);
1075}
1076
1077void
1078xfs_inode_shrinker_unregister(
1079 struct xfs_mount *mp)
1080{
1081 unregister_shrinker(&mp->m_inode_shrink);
1082}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e914fd621746..941202e7ac6e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -35,6 +35,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
35void xfs_flush_inodes(struct xfs_inode *ip); 35void xfs_flush_inodes(struct xfs_inode *ip);
36 36
37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
38int xfs_reclaim_inodes_count(struct xfs_mount *mp);
39void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
38 40
39void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 41void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
40void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); 42void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
@@ -46,7 +48,4 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
46 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 48 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
47 int flags); 49 int flags);
48 50
49void xfs_inode_shrinker_register(struct xfs_mount *mp);
50void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
51
52#endif 51#endif
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 11dd72070cbb..0135e2a669d7 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
43 43
44#ifdef CONFIG_XFS_POSIX_ACL 44#ifdef CONFIG_XFS_POSIX_ACL
45extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags); 45extern int xfs_check_acl(struct inode *inode, int mask);
46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); 47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
48extern int xfs_acl_chmod(struct inode *inode); 48extern int xfs_acl_chmod(struct inode *inode);
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index 69a21e0ebd33..8013a45242fe 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -8,6 +8,8 @@
8#ifndef _LINUX_ANON_INODES_H 8#ifndef _LINUX_ANON_INODES_H
9#define _LINUX_ANON_INODES_H 9#define _LINUX_ANON_INODES_H
10 10
11struct file_operations;
12
11struct file *anon_inode_getfile(const char *name, 13struct file *anon_inode_getfile(const char *name,
12 const struct file_operations *fops, 14 const struct file_operations *fops,
13 void *priv, int flags); 15 void *priv, int flags);
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index ee456c79b0e6..bc6615d4132b 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -34,6 +34,32 @@ static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint)
34} 34}
35#endif 35#endif
36 36
37#ifndef atomic_inc_unless_negative
38static inline int atomic_inc_unless_negative(atomic_t *p)
39{
40 int v, v1;
41 for (v = 0; v >= 0; v = v1) {
42 v1 = atomic_cmpxchg(p, v, v + 1);
43 if (likely(v1 == v))
44 return 1;
45 }
46 return 0;
47}
48#endif
49
50#ifndef atomic_dec_unless_positive
51static inline int atomic_dec_unless_positive(atomic_t *p)
52{
53 int v, v1;
54 for (v = 0; v <= 0; v = v1) {
55 v1 = atomic_cmpxchg(p, v, v - 1);
56 if (likely(v1 == v))
57 return 1;
58 }
59 return 0;
60}
61#endif
62
37#ifndef CONFIG_ARCH_HAS_ATOMIC_OR 63#ifndef CONFIG_ARCH_HAS_ATOMIC_OR
38static inline void atomic_or(int i, atomic_t *v) 64static inline void atomic_or(int i, atomic_t *v)
39{ 65{
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 8845613fd7e3..fd88a3945aa1 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -111,6 +111,7 @@ extern int __must_check remove_arg_zero(struct linux_binprm *);
111extern int search_binary_handler(struct linux_binprm *, struct pt_regs *); 111extern int search_binary_handler(struct linux_binprm *, struct pt_regs *);
112extern int flush_old_exec(struct linux_binprm * bprm); 112extern int flush_old_exec(struct linux_binprm * bprm);
113extern void setup_new_exec(struct linux_binprm * bprm); 113extern void setup_new_exec(struct linux_binprm * bprm);
114extern void would_dump(struct linux_binprm *, struct file *);
114 115
115extern int suid_dumpable; 116extern int suid_dumpable;
116#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ 117#define SUID_DUMP_DISABLE 0 /* No setuid dumping */
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 19d90a55541d..3f22d8d6d8a3 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -216,6 +216,7 @@ struct dentry_operations {
216#define DCACHE_MOUNTED 0x10000 /* is a mountpoint */ 216#define DCACHE_MOUNTED 0x10000 /* is a mountpoint */
217#define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */ 217#define DCACHE_NEED_AUTOMOUNT 0x20000 /* handle automount on this dir */
218#define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */ 218#define DCACHE_MANAGE_TRANSIT 0x40000 /* manage transit from this dirent */
219#define DCACHE_NEED_LOOKUP 0x80000 /* dentry requires i_op->lookup */
219#define DCACHE_MANAGED_DENTRY \ 220#define DCACHE_MANAGED_DENTRY \
220 (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) 221 (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
221 222
@@ -416,7 +417,12 @@ static inline bool d_mountpoint(struct dentry *dentry)
416 return dentry->d_flags & DCACHE_MOUNTED; 417 return dentry->d_flags & DCACHE_MOUNTED;
417} 418}
418 419
419extern struct dentry *lookup_create(struct nameidata *nd, int is_dir); 420static inline bool d_need_lookup(struct dentry *dentry)
421{
422 return dentry->d_flags & DCACHE_NEED_LOOKUP;
423}
424
425extern void d_clear_need_lookup(struct dentry *dentry);
420 426
421extern int sysctl_vfs_cache_pressure; 427extern int sysctl_vfs_cache_pressure;
422 428
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 5e06acf95d0f..0c473fd79acb 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -877,7 +877,7 @@ extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
877extern void ext3_htree_free_dir_info(struct dir_private_info *p); 877extern void ext3_htree_free_dir_info(struct dir_private_info *p);
878 878
879/* fsync.c */ 879/* fsync.c */
880extern int ext3_sync_file(struct file *, int); 880extern int ext3_sync_file(struct file *, loff_t, loff_t, int);
881 881
882/* hash.c */ 882/* hash.c */
883extern int ext3fs_dirhash(const char *name, int len, struct 883extern int ext3fs_dirhash(const char *name, int len, struct
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 6a8274877171..1d6836c498dd 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -1043,7 +1043,8 @@ extern void fb_deferred_io_open(struct fb_info *info,
1043 struct inode *inode, 1043 struct inode *inode,
1044 struct file *file); 1044 struct file *file);
1045extern void fb_deferred_io_cleanup(struct fb_info *info); 1045extern void fb_deferred_io_cleanup(struct fb_info *info);
1046extern int fb_deferred_io_fsync(struct file *file, int datasync); 1046extern int fb_deferred_io_fsync(struct file *file, loff_t start,
1047 loff_t end, int datasync);
1047 1048
1048static inline bool fb_be_math(struct fb_info *info) 1049static inline bool fb_be_math(struct fb_info *info)
1049{ 1050{
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b5b979247863..b224dc468a23 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -32,7 +32,9 @@
32#define SEEK_SET 0 /* seek relative to beginning of file */ 32#define SEEK_SET 0 /* seek relative to beginning of file */
33#define SEEK_CUR 1 /* seek relative to current file position */ 33#define SEEK_CUR 1 /* seek relative to current file position */
34#define SEEK_END 2 /* seek relative to end of file */ 34#define SEEK_END 2 /* seek relative to end of file */
35#define SEEK_MAX SEEK_END 35#define SEEK_DATA 3 /* seek to the next data */
36#define SEEK_HOLE 4 /* seek to the next hole */
37#define SEEK_MAX SEEK_HOLE
36 38
37struct fstrim_range { 39struct fstrim_range {
38 __u64 start; 40 __u64 start;
@@ -63,6 +65,7 @@ struct inodes_stat_t {
63#define MAY_ACCESS 16 65#define MAY_ACCESS 16
64#define MAY_OPEN 32 66#define MAY_OPEN 32
65#define MAY_CHDIR 64 67#define MAY_CHDIR 64
68#define MAY_NOT_BLOCK 128 /* called from RCU mode, don't block */
66 69
67/* 70/*
68 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond 71 * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond
@@ -392,8 +395,9 @@ struct inodes_stat_t {
392#include <linux/semaphore.h> 395#include <linux/semaphore.h>
393#include <linux/fiemap.h> 396#include <linux/fiemap.h>
394#include <linux/rculist_bl.h> 397#include <linux/rculist_bl.h>
398#include <linux/shrinker.h>
399#include <linux/atomic.h>
395 400
396#include <asm/atomic.h>
397#include <asm/byteorder.h> 401#include <asm/byteorder.h>
398 402
399struct export_operations; 403struct export_operations;
@@ -777,7 +781,7 @@ struct inode {
777 struct timespec i_ctime; 781 struct timespec i_ctime;
778 blkcnt_t i_blocks; 782 blkcnt_t i_blocks;
779 unsigned short i_bytes; 783 unsigned short i_bytes;
780 struct rw_semaphore i_alloc_sem; 784 atomic_t i_dio_count;
781 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ 785 const struct file_operations *i_fop; /* former ->i_op->default_file_ops */
782 struct file_lock *i_flock; 786 struct file_lock *i_flock;
783 struct address_space *i_mapping; 787 struct address_space *i_mapping;
@@ -1396,6 +1400,11 @@ struct super_block {
1396 struct list_head s_dentry_lru; /* unused dentry lru */ 1400 struct list_head s_dentry_lru; /* unused dentry lru */
1397 int s_nr_dentry_unused; /* # of dentry on lru */ 1401 int s_nr_dentry_unused; /* # of dentry on lru */
1398 1402
1403 /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */
1404 spinlock_t s_inode_lru_lock ____cacheline_aligned_in_smp;
1405 struct list_head s_inode_lru; /* unused inode lru */
1406 int s_nr_inodes_unused; /* # of inodes on lru */
1407
1399 struct block_device *s_bdev; 1408 struct block_device *s_bdev;
1400 struct backing_dev_info *s_bdi; 1409 struct backing_dev_info *s_bdi;
1401 struct mtd_info *s_mtd; 1410 struct mtd_info *s_mtd;
@@ -1438,8 +1447,14 @@ struct super_block {
1438 * Saved pool identifier for cleancache (-1 means none) 1447 * Saved pool identifier for cleancache (-1 means none)
1439 */ 1448 */
1440 int cleancache_poolid; 1449 int cleancache_poolid;
1450
1451 struct shrinker s_shrink; /* per-sb shrinker handle */
1441}; 1452};
1442 1453
1454/* superblock cache pruning functions */
1455extern void prune_icache_sb(struct super_block *sb, int nr_to_scan);
1456extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan);
1457
1443extern struct timespec current_fs_time(struct super_block *sb); 1458extern struct timespec current_fs_time(struct super_block *sb);
1444 1459
1445/* 1460/*
@@ -1490,7 +1505,6 @@ extern void dentry_unhash(struct dentry *dentry);
1490/* 1505/*
1491 * VFS file helper functions. 1506 * VFS file helper functions.
1492 */ 1507 */
1493extern int file_permission(struct file *, int);
1494extern void inode_init_owner(struct inode *inode, const struct inode *dir, 1508extern void inode_init_owner(struct inode *inode, const struct inode *dir,
1495 mode_t mode); 1509 mode_t mode);
1496/* 1510/*
@@ -1538,11 +1552,6 @@ struct block_device_operations;
1538#define HAVE_COMPAT_IOCTL 1 1552#define HAVE_COMPAT_IOCTL 1
1539#define HAVE_UNLOCKED_IOCTL 1 1553#define HAVE_UNLOCKED_IOCTL 1
1540 1554
1541/*
1542 * NOTE:
1543 * all file operations except setlease can be called without
1544 * the big kernel lock held in all filesystems.
1545 */
1546struct file_operations { 1555struct file_operations {
1547 struct module *owner; 1556 struct module *owner;
1548 loff_t (*llseek) (struct file *, loff_t, int); 1557 loff_t (*llseek) (struct file *, loff_t, int);
@@ -1558,7 +1567,7 @@ struct file_operations {
1558 int (*open) (struct inode *, struct file *); 1567 int (*open) (struct inode *, struct file *);
1559 int (*flush) (struct file *, fl_owner_t id); 1568 int (*flush) (struct file *, fl_owner_t id);
1560 int (*release) (struct inode *, struct file *); 1569 int (*release) (struct inode *, struct file *);
1561 int (*fsync) (struct file *, int datasync); 1570 int (*fsync) (struct file *, loff_t, loff_t, int datasync);
1562 int (*aio_fsync) (struct kiocb *, int datasync); 1571 int (*aio_fsync) (struct kiocb *, int datasync);
1563 int (*fasync) (int, struct file *, int); 1572 int (*fasync) (int, struct file *, int);
1564 int (*lock) (struct file *, int, struct file_lock *); 1573 int (*lock) (struct file *, int, struct file_lock *);
@@ -1573,13 +1582,11 @@ struct file_operations {
1573 loff_t len); 1582 loff_t len);
1574}; 1583};
1575 1584
1576#define IPERM_FLAG_RCU 0x0001
1577
1578struct inode_operations { 1585struct inode_operations {
1579 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); 1586 struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
1580 void * (*follow_link) (struct dentry *, struct nameidata *); 1587 void * (*follow_link) (struct dentry *, struct nameidata *);
1581 int (*permission) (struct inode *, int, unsigned int); 1588 int (*permission) (struct inode *, int);
1582 int (*check_acl)(struct inode *, int, unsigned int); 1589 int (*check_acl)(struct inode *, int);
1583 1590
1584 int (*readlink) (struct dentry *, char __user *,int); 1591 int (*readlink) (struct dentry *, char __user *,int);
1585 void (*put_link) (struct dentry *, struct nameidata *, void *); 1592 void (*put_link) (struct dentry *, struct nameidata *, void *);
@@ -1645,6 +1652,8 @@ struct super_operations {
1645 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); 1652 ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
1646#endif 1653#endif
1647 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); 1654 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
1655 int (*nr_cached_objects)(struct super_block *);
1656 void (*free_cached_objects)(struct super_block *, int);
1648}; 1657};
1649 1658
1650/* 1659/*
@@ -1693,6 +1702,10 @@ struct super_operations {
1693 * set during data writeback, and cleared with a wakeup 1702 * set during data writeback, and cleared with a wakeup
1694 * on the bit address once it is done. 1703 * on the bit address once it is done.
1695 * 1704 *
1705 * I_REFERENCED Marks the inode as recently references on the LRU list.
1706 *
1707 * I_DIO_WAKEUP Never set. Only used as a key for wait_on_bit().
1708 *
1696 * Q: What is the difference between I_WILL_FREE and I_FREEING? 1709 * Q: What is the difference between I_WILL_FREE and I_FREEING?
1697 */ 1710 */
1698#define I_DIRTY_SYNC (1 << 0) 1711#define I_DIRTY_SYNC (1 << 0)
@@ -1706,6 +1719,8 @@ struct super_operations {
1706#define __I_SYNC 7 1719#define __I_SYNC 7
1707#define I_SYNC (1 << __I_SYNC) 1720#define I_SYNC (1 << __I_SYNC)
1708#define I_REFERENCED (1 << 8) 1721#define I_REFERENCED (1 << 8)
1722#define __I_DIO_WAKEUP 9
1723#define I_DIO_WAKEUP (1 << I_DIO_WAKEUP)
1709 1724
1710#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) 1725#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
1711 1726
@@ -1816,7 +1831,6 @@ struct file_system_type {
1816 struct lock_class_key i_lock_key; 1831 struct lock_class_key i_lock_key;
1817 struct lock_class_key i_mutex_key; 1832 struct lock_class_key i_mutex_key;
1818 struct lock_class_key i_mutex_dir_key; 1833 struct lock_class_key i_mutex_dir_key;
1819 struct lock_class_key i_alloc_sem_key;
1820}; 1834};
1821 1835
1822extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, 1836extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
@@ -1837,6 +1851,8 @@ void kill_litter_super(struct super_block *sb);
1837void deactivate_super(struct super_block *sb); 1851void deactivate_super(struct super_block *sb);
1838void deactivate_locked_super(struct super_block *sb); 1852void deactivate_locked_super(struct super_block *sb);
1839int set_anon_super(struct super_block *s, void *data); 1853int set_anon_super(struct super_block *s, void *data);
1854int get_anon_bdev(dev_t *);
1855void free_anon_bdev(dev_t);
1840struct super_block *sget(struct file_system_type *type, 1856struct super_block *sget(struct file_system_type *type,
1841 int (*test)(struct super_block *,void *), 1857 int (*test)(struct super_block *,void *),
1842 int (*set)(struct super_block *,void *), 1858 int (*set)(struct super_block *,void *),
@@ -2188,16 +2204,38 @@ extern sector_t bmap(struct inode *, sector_t);
2188#endif 2204#endif
2189extern int notify_change(struct dentry *, struct iattr *); 2205extern int notify_change(struct dentry *, struct iattr *);
2190extern int inode_permission(struct inode *, int); 2206extern int inode_permission(struct inode *, int);
2191extern int generic_permission(struct inode *, int, unsigned int, 2207extern int generic_permission(struct inode *, int);
2192 int (*check_acl)(struct inode *, int, unsigned int));
2193 2208
2194static inline bool execute_ok(struct inode *inode) 2209static inline bool execute_ok(struct inode *inode)
2195{ 2210{
2196 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); 2211 return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
2197} 2212}
2198 2213
2199extern int get_write_access(struct inode *); 2214/*
2200extern int deny_write_access(struct file *); 2215 * get_write_access() gets write permission for a file.
2216 * put_write_access() releases this write permission.
2217 * This is used for regular files.
2218 * We cannot support write (and maybe mmap read-write shared) accesses and
2219 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
2220 * can have the following values:
2221 * 0: no writers, no VM_DENYWRITE mappings
2222 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
2223 * > 0: (i_writecount) users are writing to the file.
2224 *
2225 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
2226 * except for the cases where we don't hold i_writecount yet. Then we need to
2227 * use {get,deny}_write_access() - these functions check the sign and refuse
2228 * to do the change if sign is wrong.
2229 */
2230static inline int get_write_access(struct inode *inode)
2231{
2232 return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY;
2233}
2234static inline int deny_write_access(struct file *file)
2235{
2236 struct inode *inode = file->f_path.dentry->d_inode;
2237 return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
2238}
2201static inline void put_write_access(struct inode * inode) 2239static inline void put_write_access(struct inode * inode)
2202{ 2240{
2203 atomic_dec(&inode->i_writecount); 2241 atomic_dec(&inode->i_writecount);
@@ -2317,7 +2355,8 @@ extern int generic_segment_checks(const struct iovec *iov,
2317/* fs/block_dev.c */ 2355/* fs/block_dev.c */
2318extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, 2356extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
2319 unsigned long nr_segs, loff_t pos); 2357 unsigned long nr_segs, loff_t pos);
2320extern int blkdev_fsync(struct file *filp, int datasync); 2358extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
2359 int datasync);
2321 2360
2322/* fs/splice.c */ 2361/* fs/splice.c */
2323extern ssize_t generic_file_splice_read(struct file *, loff_t *, 2362extern ssize_t generic_file_splice_read(struct file *, loff_t *,
@@ -2368,6 +2407,8 @@ enum {
2368}; 2407};
2369 2408
2370void dio_end_io(struct bio *bio, int error); 2409void dio_end_io(struct bio *bio, int error);
2410void inode_dio_wait(struct inode *inode);
2411void inode_dio_done(struct inode *inode);
2371 2412
2372ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 2413ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2373 struct block_device *bdev, const struct iovec *iov, loff_t offset, 2414 struct block_device *bdev, const struct iovec *iov, loff_t offset,
@@ -2375,14 +2416,17 @@ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2375 dio_submit_t submit_io, int flags); 2416 dio_submit_t submit_io, int flags);
2376 2417
2377static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, 2418static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
2378 struct inode *inode, struct block_device *bdev, const struct iovec *iov, 2419 struct inode *inode, const struct iovec *iov, loff_t offset,
2379 loff_t offset, unsigned long nr_segs, get_block_t get_block, 2420 unsigned long nr_segs, get_block_t get_block)
2380 dio_iodone_t end_io)
2381{ 2421{
2382 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2422 return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2383 nr_segs, get_block, end_io, NULL, 2423 offset, nr_segs, get_block, NULL, NULL,
2384 DIO_LOCKING | DIO_SKIP_HOLES); 2424 DIO_LOCKING | DIO_SKIP_HOLES);
2385} 2425}
2426#else
2427static inline void inode_dio_wait(struct inode *inode)
2428{
2429}
2386#endif 2430#endif
2387 2431
2388extern const struct file_operations generic_ro_fops; 2432extern const struct file_operations generic_ro_fops;
@@ -2432,6 +2476,8 @@ extern struct super_block *get_active_super(struct block_device *bdev);
2432extern struct super_block *user_get_super(dev_t); 2476extern struct super_block *user_get_super(dev_t);
2433extern void drop_super(struct super_block *sb); 2477extern void drop_super(struct super_block *sb);
2434extern void iterate_supers(void (*)(struct super_block *, void *), void *); 2478extern void iterate_supers(void (*)(struct super_block *, void *), void *);
2479extern void iterate_supers_type(struct file_system_type *,
2480 void (*)(struct super_block *, void *), void *);
2435 2481
2436extern int dcache_dir_open(struct inode *, struct file *); 2482extern int dcache_dir_open(struct inode *, struct file *);
2437extern int dcache_dir_close(struct inode *, struct file *); 2483extern int dcache_dir_close(struct inode *, struct file *);
@@ -2444,7 +2490,7 @@ extern int simple_link(struct dentry *, struct inode *, struct dentry *);
2444extern int simple_unlink(struct inode *, struct dentry *); 2490extern int simple_unlink(struct inode *, struct dentry *);
2445extern int simple_rmdir(struct inode *, struct dentry *); 2491extern int simple_rmdir(struct inode *, struct dentry *);
2446extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); 2492extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
2447extern int noop_fsync(struct file *, int); 2493extern int noop_fsync(struct file *, loff_t, loff_t, int);
2448extern int simple_empty(struct dentry *); 2494extern int simple_empty(struct dentry *);
2449extern int simple_readpage(struct file *file, struct page *page); 2495extern int simple_readpage(struct file *file, struct page *page);
2450extern int simple_write_begin(struct file *file, struct address_space *mapping, 2496extern int simple_write_begin(struct file *file, struct address_space *mapping,
@@ -2469,7 +2515,7 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
2469extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, 2515extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
2470 const void __user *from, size_t count); 2516 const void __user *from, size_t count);
2471 2517
2472extern int generic_file_fsync(struct file *, int); 2518extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
2473 2519
2474extern int generic_check_addressable(unsigned, u64); 2520extern int generic_check_addressable(unsigned, u64);
2475 2521
diff --git a/include/linux/generic_acl.h b/include/linux/generic_acl.h
index 0437e377b555..574bea4013b6 100644
--- a/include/linux/generic_acl.h
+++ b/include/linux/generic_acl.h
@@ -10,6 +10,6 @@ extern const struct xattr_handler generic_acl_default_handler;
10 10
11int generic_acl_init(struct inode *, struct inode *); 11int generic_acl_init(struct inode *, struct inode *);
12int generic_acl_chmod(struct inode *); 12int generic_acl_chmod(struct inode *);
13int generic_check_acl(struct inode *inode, int mask, unsigned int flags); 13int generic_check_acl(struct inode *inode, int mask);
14 14
15#endif /* LINUX_GENERIC_ACL_H */ 15#endif /* LINUX_GENERIC_ACL_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c70a326b8f26..8a45ad22a170 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
15#include <linux/range.h> 15#include <linux/range.h>
16#include <linux/pfn.h> 16#include <linux/pfn.h>
17#include <linux/bit_spinlock.h> 17#include <linux/bit_spinlock.h>
18#include <linux/shrinker.h>
18 19
19struct mempolicy; 20struct mempolicy;
20struct anon_vma; 21struct anon_vma;
@@ -1121,44 +1122,6 @@ static inline void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
1121} 1122}
1122#endif 1123#endif
1123 1124
1124/*
1125 * This struct is used to pass information from page reclaim to the shrinkers.
1126 * We consolidate the values for easier extention later.
1127 */
1128struct shrink_control {
1129 gfp_t gfp_mask;
1130
1131 /* How many slab objects shrinker() should scan and try to reclaim */
1132 unsigned long nr_to_scan;
1133};
1134
1135/*
1136 * A callback you can register to apply pressure to ageable caches.
1137 *
1138 * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
1139 * and a 'gfpmask'. It should look through the least-recently-used
1140 * 'nr_to_scan' entries and attempt to free them up. It should return
1141 * the number of objects which remain in the cache. If it returns -1, it means
1142 * it cannot do any scanning at this time (eg. there is a risk of deadlock).
1143 *
1144 * The 'gfpmask' refers to the allocation we are currently trying to
1145 * fulfil.
1146 *
1147 * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
1148 * querying the cache size, so a fastpath for that case is appropriate.
1149 */
1150struct shrinker {
1151 int (*shrink)(struct shrinker *, struct shrink_control *sc);
1152 int seeks; /* seeks to recreate an obj */
1153
1154 /* These are for internal use */
1155 struct list_head list;
1156 long nr; /* objs pending delete */
1157};
1158#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
1159extern void register_shrinker(struct shrinker *);
1160extern void unregister_shrinker(struct shrinker *);
1161
1162int vma_wants_writenotify(struct vm_area_struct *vma); 1125int vma_wants_writenotify(struct vm_area_struct *vma);
1163 1126
1164extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, 1127extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 0b89efc6f215..29304855652d 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -18,7 +18,6 @@ struct proc_mounts {
18 struct seq_file m; /* must be the first element */ 18 struct seq_file m; /* must be the first element */
19 struct mnt_namespace *ns; 19 struct mnt_namespace *ns;
20 struct path root; 20 struct path root;
21 int event;
22}; 21};
23 22
24struct fs_struct; 23struct fs_struct;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index eba45ea10298..76fe2c62ae71 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -48,7 +48,6 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
48 */ 48 */
49#define LOOKUP_FOLLOW 0x0001 49#define LOOKUP_FOLLOW 0x0001
50#define LOOKUP_DIRECTORY 0x0002 50#define LOOKUP_DIRECTORY 0x0002
51#define LOOKUP_CONTINUE 0x0004
52 51
53#define LOOKUP_PARENT 0x0010 52#define LOOKUP_PARENT 0x0010
54#define LOOKUP_REVAL 0x0020 53#define LOOKUP_REVAL 0x0020
@@ -75,9 +74,11 @@ extern int user_path_at(int, const char __user *, unsigned, struct path *);
75 74
76extern int kern_path(const char *, unsigned, struct path *); 75extern int kern_path(const char *, unsigned, struct path *);
77 76
77extern struct dentry *kern_path_create(int, const char *, struct path *, int);
78extern struct dentry *user_path_create(int, const char __user *, struct path *, int);
78extern int kern_path_parent(const char *, struct nameidata *); 79extern int kern_path_parent(const char *, struct nameidata *);
79extern int vfs_path_lookup(struct dentry *, struct vfsmount *, 80extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
80 const char *, unsigned int, struct nameidata *); 81 const char *, unsigned int, struct path *);
81 82
82extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 83extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry,
83 int (*open)(struct inode *, struct file *)); 84 int (*open)(struct inode *, struct file *));
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1b93b9c60e55..8b579beb6358 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -85,7 +85,7 @@ struct nfs_lock_context {
85struct nfs4_state; 85struct nfs4_state;
86struct nfs_open_context { 86struct nfs_open_context {
87 struct nfs_lock_context lock_context; 87 struct nfs_lock_context lock_context;
88 struct path path; 88 struct dentry *dentry;
89 struct rpc_cred *cred; 89 struct rpc_cred *cred;
90 struct nfs4_state *state; 90 struct nfs4_state *state;
91 fmode_t mode; 91 fmode_t mode;
@@ -360,7 +360,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
360extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); 360extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
361extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); 361extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
362extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 362extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
363extern int nfs_permission(struct inode *, int, unsigned int); 363extern int nfs_permission(struct inode *, int);
364extern int nfs_open(struct inode *, struct file *); 364extern int nfs_open(struct inode *, struct file *);
365extern int nfs_release(struct inode *, struct file *); 365extern int nfs_release(struct inode *, struct file *);
366extern int nfs_attribute_timeout(struct inode *inode); 366extern int nfs_attribute_timeout(struct inode *inode);
@@ -372,7 +372,7 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
372extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); 372extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
373extern void put_nfs_open_context(struct nfs_open_context *ctx); 373extern void put_nfs_open_context(struct nfs_open_context *ctx);
374extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode); 374extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
375extern struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode); 375extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode);
376extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); 376extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
377extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx); 377extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
378extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx); 378extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index 50d20aba57d3..cc37a55ad004 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -68,6 +68,7 @@ void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
68void free_nsproxy(struct nsproxy *ns); 68void free_nsproxy(struct nsproxy *ns);
69int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **, 69int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
70 struct fs_struct *); 70 struct fs_struct *);
71int __init nsproxy_cache_init(void);
71 72
72static inline void put_nsproxy(struct nsproxy *ns) 73static inline void put_nsproxy(struct nsproxy *ns)
73{ 74{
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 6deef5dc95fb..57958c0e1d38 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -41,10 +41,11 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
41int reiserfs_lookup_privroot(struct super_block *sb); 41int reiserfs_lookup_privroot(struct super_block *sb);
42int reiserfs_delete_xattrs(struct inode *inode); 42int reiserfs_delete_xattrs(struct inode *inode);
43int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs); 43int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
44int reiserfs_permission(struct inode *inode, int mask, unsigned int flags); 44int reiserfs_permission(struct inode *inode, int mask);
45 45
46#ifdef CONFIG_REISERFS_FS_XATTR 46#ifdef CONFIG_REISERFS_FS_XATTR
47#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) 47#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
48int reiserfs_check_acl(struct inode *inode, int mask);
48ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name, 49ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name,
49 void *buffer, size_t size); 50 void *buffer, size_t size);
50int reiserfs_setxattr(struct dentry *dentry, const char *name, 51int reiserfs_setxattr(struct dentry *dentry, const char *name,
@@ -122,6 +123,7 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
122#define reiserfs_setxattr NULL 123#define reiserfs_setxattr NULL
123#define reiserfs_listxattr NULL 124#define reiserfs_listxattr NULL
124#define reiserfs_removexattr NULL 125#define reiserfs_removexattr NULL
126#define reiserfs_check_acl NULL
125 127
126static inline void reiserfs_init_xattr_rwsem(struct inode *inode) 128static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
127{ 129{
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index a8afe9cd000c..77950dfa0a9e 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -124,19 +124,9 @@ extern void downgrade_write(struct rw_semaphore *sem);
124 */ 124 */
125extern void down_read_nested(struct rw_semaphore *sem, int subclass); 125extern void down_read_nested(struct rw_semaphore *sem, int subclass);
126extern void down_write_nested(struct rw_semaphore *sem, int subclass); 126extern void down_write_nested(struct rw_semaphore *sem, int subclass);
127/*
128 * Take/release a lock when not the owner will release it.
129 *
130 * [ This API should be avoided as much as possible - the
131 * proper abstraction for this case is completions. ]
132 */
133extern void down_read_non_owner(struct rw_semaphore *sem);
134extern void up_read_non_owner(struct rw_semaphore *sem);
135#else 127#else
136# define down_read_nested(sem, subclass) down_read(sem) 128# define down_read_nested(sem, subclass) down_read(sem)
137# define down_write_nested(sem, subclass) down_write(sem) 129# define down_write_nested(sem, subclass) down_write(sem)
138# define down_read_non_owner(sem) down_read(sem)
139# define up_read_non_owner(sem) up_read(sem)
140#endif 130#endif
141 131
142#endif /* _LINUX_RWSEM_H */ 132#endif /* _LINUX_RWSEM_H */
diff --git a/include/linux/security.h b/include/linux/security.h
index 8ce59ef3e5af..ebd2a53a3d07 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1456,7 +1456,7 @@ struct security_operations {
1456 struct inode *new_dir, struct dentry *new_dentry); 1456 struct inode *new_dir, struct dentry *new_dentry);
1457 int (*inode_readlink) (struct dentry *dentry); 1457 int (*inode_readlink) (struct dentry *dentry);
1458 int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); 1458 int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd);
1459 int (*inode_permission) (struct inode *inode, int mask, unsigned flags); 1459 int (*inode_permission) (struct inode *inode, int mask);
1460 int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); 1460 int (*inode_setattr) (struct dentry *dentry, struct iattr *attr);
1461 int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); 1461 int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
1462 int (*inode_setxattr) (struct dentry *dentry, const char *name, 1462 int (*inode_setxattr) (struct dentry *dentry, const char *name,
@@ -1720,7 +1720,6 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
1720int security_inode_readlink(struct dentry *dentry); 1720int security_inode_readlink(struct dentry *dentry);
1721int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); 1721int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
1722int security_inode_permission(struct inode *inode, int mask); 1722int security_inode_permission(struct inode *inode, int mask);
1723int security_inode_exec_permission(struct inode *inode, unsigned int flags);
1724int security_inode_setattr(struct dentry *dentry, struct iattr *attr); 1723int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
1725int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry); 1724int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
1726int security_inode_setxattr(struct dentry *dentry, const char *name, 1725int security_inode_setxattr(struct dentry *dentry, const char *name,
@@ -2113,12 +2112,6 @@ static inline int security_inode_permission(struct inode *inode, int mask)
2113 return 0; 2112 return 0;
2114} 2113}
2115 2114
2116static inline int security_inode_exec_permission(struct inode *inode,
2117 unsigned int flags)
2118{
2119 return 0;
2120}
2121
2122static inline int security_inode_setattr(struct dentry *dentry, 2115static inline int security_inode_setattr(struct dentry *dentry,
2123 struct iattr *attr) 2116 struct iattr *attr)
2124{ 2117{
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 03c0232b4169..be720cd2038d 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -23,6 +23,7 @@ struct seq_file {
23 u64 version; 23 u64 version;
24 struct mutex lock; 24 struct mutex lock;
25 const struct seq_operations *op; 25 const struct seq_operations *op;
26 int poll_event;
26 void *private; 27 void *private;
27}; 28};
28 29
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
new file mode 100644
index 000000000000..790651b4e5ba
--- /dev/null
+++ b/include/linux/shrinker.h
@@ -0,0 +1,42 @@
1#ifndef _LINUX_SHRINKER_H
2#define _LINUX_SHRINKER_H
3
4/*
5 * This struct is used to pass information from page reclaim to the shrinkers.
6 * We consolidate the values for easier extention later.
7 */
8struct shrink_control {
9 gfp_t gfp_mask;
10
11 /* How many slab objects shrinker() should scan and try to reclaim */
12 unsigned long nr_to_scan;
13};
14
15/*
16 * A callback you can register to apply pressure to ageable caches.
17 *
18 * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
19 * and a 'gfpmask'. It should look through the least-recently-used
20 * 'nr_to_scan' entries and attempt to free them up. It should return
21 * the number of objects which remain in the cache. If it returns -1, it means
22 * it cannot do any scanning at this time (eg. there is a risk of deadlock).
23 *
24 * The 'gfpmask' refers to the allocation we are currently trying to
25 * fulfil.
26 *
27 * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
28 * querying the cache size, so a fastpath for that case is appropriate.
29 */
30struct shrinker {
31 int (*shrink)(struct shrinker *, struct shrink_control *sc);
32 int seeks; /* seeks to recreate an obj */
33 long batch; /* reclaim batch size, 0 = default */
34
35 /* These are for internal use */
36 struct list_head list;
37 long nr; /* objs pending delete */
38};
39#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
40extern void register_shrinker(struct shrinker *);
41extern void unregister_shrinker(struct shrinker *);
42#endif
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index b2c33bd955fa..36851f7f13da 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -179,6 +179,83 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_memcg_softlimit_re
179 TP_ARGS(nr_reclaimed) 179 TP_ARGS(nr_reclaimed)
180); 180);
181 181
182TRACE_EVENT(mm_shrink_slab_start,
183 TP_PROTO(struct shrinker *shr, struct shrink_control *sc,
184 long nr_objects_to_shrink, unsigned long pgs_scanned,
185 unsigned long lru_pgs, unsigned long cache_items,
186 unsigned long long delta, unsigned long total_scan),
187
188 TP_ARGS(shr, sc, nr_objects_to_shrink, pgs_scanned, lru_pgs,
189 cache_items, delta, total_scan),
190
191 TP_STRUCT__entry(
192 __field(struct shrinker *, shr)
193 __field(void *, shrink)
194 __field(long, nr_objects_to_shrink)
195 __field(gfp_t, gfp_flags)
196 __field(unsigned long, pgs_scanned)
197 __field(unsigned long, lru_pgs)
198 __field(unsigned long, cache_items)
199 __field(unsigned long long, delta)
200 __field(unsigned long, total_scan)
201 ),
202
203 TP_fast_assign(
204 __entry->shr = shr;
205 __entry->shrink = shr->shrink;
206 __entry->nr_objects_to_shrink = nr_objects_to_shrink;
207 __entry->gfp_flags = sc->gfp_mask;
208 __entry->pgs_scanned = pgs_scanned;
209 __entry->lru_pgs = lru_pgs;
210 __entry->cache_items = cache_items;
211 __entry->delta = delta;
212 __entry->total_scan = total_scan;
213 ),
214
215 TP_printk("%pF %p: objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld",
216 __entry->shrink,
217 __entry->shr,
218 __entry->nr_objects_to_shrink,
219 show_gfp_flags(__entry->gfp_flags),
220 __entry->pgs_scanned,
221 __entry->lru_pgs,
222 __entry->cache_items,
223 __entry->delta,
224 __entry->total_scan)
225);
226
227TRACE_EVENT(mm_shrink_slab_end,
228 TP_PROTO(struct shrinker *shr, int shrinker_retval,
229 long unused_scan_cnt, long new_scan_cnt),
230
231 TP_ARGS(shr, shrinker_retval, unused_scan_cnt, new_scan_cnt),
232
233 TP_STRUCT__entry(
234 __field(struct shrinker *, shr)
235 __field(void *, shrink)
236 __field(long, unused_scan)
237 __field(long, new_scan)
238 __field(int, retval)
239 __field(long, total_scan)
240 ),
241
242 TP_fast_assign(
243 __entry->shr = shr;
244 __entry->shrink = shr->shrink;
245 __entry->unused_scan = unused_scan_cnt;
246 __entry->new_scan = new_scan_cnt;
247 __entry->retval = shrinker_retval;
248 __entry->total_scan = new_scan_cnt - unused_scan_cnt;
249 ),
250
251 TP_printk("%pF %p: unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d",
252 __entry->shrink,
253 __entry->shr,
254 __entry->unused_scan,
255 __entry->new_scan,
256 __entry->total_scan,
257 __entry->retval)
258);
182 259
183DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template, 260DECLARE_EVENT_CLASS(mm_vmscan_lru_isolate_template,
184 261
diff --git a/ipc/shm.c b/ipc/shm.c
index ab3385a21b27..27884adb1a90 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -277,13 +277,13 @@ static int shm_release(struct inode *ino, struct file *file)
277 return 0; 277 return 0;
278} 278}
279 279
280static int shm_fsync(struct file *file, int datasync) 280static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
281{ 281{
282 struct shm_file_data *sfd = shm_file_data(file); 282 struct shm_file_data *sfd = shm_file_data(file);
283 283
284 if (!sfd->file->f_op->fsync) 284 if (!sfd->file->f_op->fsync)
285 return -EINVAL; 285 return -EINVAL;
286 return sfd->file->f_op->fsync(sfd->file, datasync); 286 return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
287} 287}
288 288
289static unsigned long shm_get_unmapped_area(struct file *file, 289static unsigned long shm_get_unmapped_area(struct file *file,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2731d115d725..e1c72c0f512b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3542,7 +3542,8 @@ static int cgroup_write_event_control(struct cgroup *cgrp, struct cftype *cft,
3542 } 3542 }
3543 3543
3544 /* the process need read permission on control file */ 3544 /* the process need read permission on control file */
3545 ret = file_permission(cfile, MAY_READ); 3545 /* AV: shouldn't we check that it's been opened for read instead? */
3546 ret = inode_permission(cfile->f_path.dentry->d_inode, MAY_READ);
3546 if (ret < 0) 3547 if (ret < 0)
3547 goto fail; 3548 goto fail;
3548 3549
diff --git a/kernel/fork.c b/kernel/fork.c
index ca339c5c5819..aeae5b11b62e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1585,6 +1585,7 @@ void __init proc_caches_init(void)
1585 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); 1585 SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
1586 vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); 1586 vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
1587 mmap_init(); 1587 mmap_init();
1588 nsproxy_cache_init();
1588} 1589}
1589 1590
1590/* 1591/*
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index d6a00f3de15d..9aeab4b98c64 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -271,10 +271,8 @@ out:
271 return err; 271 return err;
272} 272}
273 273
274static int __init nsproxy_cache_init(void) 274int __init nsproxy_cache_init(void)
275{ 275{
276 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 276 nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC);
277 return 0; 277 return 0;
278} 278}
279
280module_init(nsproxy_cache_init);
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index cae050b05f5e..176e5e56ffab 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -117,15 +117,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
117 117
118EXPORT_SYMBOL(down_read_nested); 118EXPORT_SYMBOL(down_read_nested);
119 119
120void down_read_non_owner(struct rw_semaphore *sem)
121{
122 might_sleep();
123
124 __down_read(sem);
125}
126
127EXPORT_SYMBOL(down_read_non_owner);
128
129void down_write_nested(struct rw_semaphore *sem, int subclass) 120void down_write_nested(struct rw_semaphore *sem, int subclass)
130{ 121{
131 might_sleep(); 122 might_sleep();
@@ -136,13 +127,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
136 127
137EXPORT_SYMBOL(down_write_nested); 128EXPORT_SYMBOL(down_write_nested);
138 129
139void up_read_non_owner(struct rw_semaphore *sem)
140{
141 __up_read(sem);
142}
143
144EXPORT_SYMBOL(up_read_non_owner);
145
146#endif 130#endif
147 131
148 132
diff --git a/mm/filemap.c b/mm/filemap.c
index a8251a8d3457..f820e600f1ad 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -78,9 +78,6 @@
78 * ->i_mutex (generic_file_buffered_write) 78 * ->i_mutex (generic_file_buffered_write)
79 * ->mmap_sem (fault_in_pages_readable->do_page_fault) 79 * ->mmap_sem (fault_in_pages_readable->do_page_fault)
80 * 80 *
81 * ->i_mutex
82 * ->i_alloc_sem (various)
83 *
84 * inode_wb_list_lock 81 * inode_wb_list_lock
85 * sb_lock (fs/fs-writeback.c) 82 * sb_lock (fs/fs-writeback.c)
86 * ->mapping->tree_lock (__sync_single_inode) 83 * ->mapping->tree_lock (__sync_single_inode)
diff --git a/mm/madvise.c b/mm/madvise.c
index 2221491ed503..74bf193eff04 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -218,7 +218,7 @@ static long madvise_remove(struct vm_area_struct *vma,
218 endoff = (loff_t)(end - vma->vm_start - 1) 218 endoff = (loff_t)(end - vma->vm_start - 1)
219 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 219 + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
220 220
221 /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ 221 /* vmtruncate_range needs to take i_mutex */
222 up_read(&current->mm->mmap_sem); 222 up_read(&current->mm->mmap_sem);
223 error = vmtruncate_range(mapping->host, offset, endoff); 223 error = vmtruncate_range(mapping->host, offset, endoff);
224 down_read(&current->mm->mmap_sem); 224 down_read(&current->mm->mmap_sem);
diff --git a/mm/rmap.c b/mm/rmap.c
index 23295f65ae43..2540a39eea4a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -21,7 +21,6 @@
21 * Lock ordering in mm: 21 * Lock ordering in mm:
22 * 22 *
23 * inode->i_mutex (while writing or truncating, not reading or faulting) 23 * inode->i_mutex (while writing or truncating, not reading or faulting)
24 * inode->i_alloc_sem (vmtruncate_range)
25 * mm->mmap_sem 24 * mm->mmap_sem
26 * page->flags PG_locked (lock_page) 25 * page->flags PG_locked (lock_page)
27 * mapping->i_mmap_mutex 26 * mapping->i_mmap_mutex
diff --git a/mm/swapfile.c b/mm/swapfile.c
index ff8dc1a18cb4..1b8c33907242 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1681,19 +1681,14 @@ out:
1681} 1681}
1682 1682
1683#ifdef CONFIG_PROC_FS 1683#ifdef CONFIG_PROC_FS
1684struct proc_swaps {
1685 struct seq_file seq;
1686 int event;
1687};
1688
1689static unsigned swaps_poll(struct file *file, poll_table *wait) 1684static unsigned swaps_poll(struct file *file, poll_table *wait)
1690{ 1685{
1691 struct proc_swaps *s = file->private_data; 1686 struct seq_file *seq = file->private_data;
1692 1687
1693 poll_wait(file, &proc_poll_wait, wait); 1688 poll_wait(file, &proc_poll_wait, wait);
1694 1689
1695 if (s->event != atomic_read(&proc_poll_event)) { 1690 if (seq->poll_event != atomic_read(&proc_poll_event)) {
1696 s->event = atomic_read(&proc_poll_event); 1691 seq->poll_event = atomic_read(&proc_poll_event);
1697 return POLLIN | POLLRDNORM | POLLERR | POLLPRI; 1692 return POLLIN | POLLRDNORM | POLLERR | POLLPRI;
1698 } 1693 }
1699 1694
@@ -1783,24 +1778,16 @@ static const struct seq_operations swaps_op = {
1783 1778
1784static int swaps_open(struct inode *inode, struct file *file) 1779static int swaps_open(struct inode *inode, struct file *file)
1785{ 1780{
1786 struct proc_swaps *s; 1781 struct seq_file *seq;
1787 int ret; 1782 int ret;
1788 1783
1789 s = kmalloc(sizeof(struct proc_swaps), GFP_KERNEL);
1790 if (!s)
1791 return -ENOMEM;
1792
1793 file->private_data = s;
1794
1795 ret = seq_open(file, &swaps_op); 1784 ret = seq_open(file, &swaps_op);
1796 if (ret) { 1785 if (ret)
1797 kfree(s);
1798 return ret; 1786 return ret;
1799 }
1800 1787
1801 s->seq.private = s; 1788 seq = file->private_data;
1802 s->event = atomic_read(&proc_poll_event); 1789 seq->poll_event = atomic_read(&proc_poll_event);
1803 return ret; 1790 return 0;
1804} 1791}
1805 1792
1806static const struct file_operations proc_swaps_operations = { 1793static const struct file_operations proc_swaps_operations = {
diff --git a/mm/truncate.c b/mm/truncate.c
index e13f22efaad7..003c6c685fc8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -622,12 +622,11 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
622 return -ENOSYS; 622 return -ENOSYS;
623 623
624 mutex_lock(&inode->i_mutex); 624 mutex_lock(&inode->i_mutex);
625 down_write(&inode->i_alloc_sem); 625 inode_dio_wait(inode);
626 unmap_mapping_range(mapping, offset, (end - offset), 1); 626 unmap_mapping_range(mapping, offset, (end - offset), 1);
627 inode->i_op->truncate_range(inode, offset, end); 627 inode->i_op->truncate_range(inode, offset, end);
628 /* unmap again to remove racily COWed private pages */ 628 /* unmap again to remove racily COWed private pages */
629 unmap_mapping_range(mapping, offset, (end - offset), 1); 629 unmap_mapping_range(mapping, offset, (end - offset), 1);
630 up_write(&inode->i_alloc_sem);
631 mutex_unlock(&inode->i_mutex); 630 mutex_unlock(&inode->i_mutex);
632 631
633 return 0; 632 return 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d036e59d302b..febbc044e792 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -250,49 +250,90 @@ unsigned long shrink_slab(struct shrink_control *shrink,
250 unsigned long long delta; 250 unsigned long long delta;
251 unsigned long total_scan; 251 unsigned long total_scan;
252 unsigned long max_pass; 252 unsigned long max_pass;
253 int shrink_ret = 0;
254 long nr;
255 long new_nr;
256 long batch_size = shrinker->batch ? shrinker->batch
257 : SHRINK_BATCH;
253 258
259 /*
260 * copy the current shrinker scan count into a local variable
261 * and zero it so that other concurrent shrinker invocations
262 * don't also do this scanning work.
263 */
264 do {
265 nr = shrinker->nr;
266 } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
267
268 total_scan = nr;
254 max_pass = do_shrinker_shrink(shrinker, shrink, 0); 269 max_pass = do_shrinker_shrink(shrinker, shrink, 0);
255 delta = (4 * nr_pages_scanned) / shrinker->seeks; 270 delta = (4 * nr_pages_scanned) / shrinker->seeks;
256 delta *= max_pass; 271 delta *= max_pass;
257 do_div(delta, lru_pages + 1); 272 do_div(delta, lru_pages + 1);
258 shrinker->nr += delta; 273 total_scan += delta;
259 if (shrinker->nr < 0) { 274 if (total_scan < 0) {
260 printk(KERN_ERR "shrink_slab: %pF negative objects to " 275 printk(KERN_ERR "shrink_slab: %pF negative objects to "
261 "delete nr=%ld\n", 276 "delete nr=%ld\n",
262 shrinker->shrink, shrinker->nr); 277 shrinker->shrink, total_scan);
263 shrinker->nr = max_pass; 278 total_scan = max_pass;
264 } 279 }
265 280
266 /* 281 /*
282 * We need to avoid excessive windup on filesystem shrinkers
283 * due to large numbers of GFP_NOFS allocations causing the
284 * shrinkers to return -1 all the time. This results in a large
285 * nr being built up so when a shrink that can do some work
286 * comes along it empties the entire cache due to nr >>>
287 * max_pass. This is bad for sustaining a working set in
288 * memory.
289 *
290 * Hence only allow the shrinker to scan the entire cache when
291 * a large delta change is calculated directly.
292 */
293 if (delta < max_pass / 4)
294 total_scan = min(total_scan, max_pass / 2);
295
296 /*
267 * Avoid risking looping forever due to too large nr value: 297 * Avoid risking looping forever due to too large nr value:
268 * never try to free more than twice the estimate number of 298 * never try to free more than twice the estimate number of
269 * freeable entries. 299 * freeable entries.
270 */ 300 */
271 if (shrinker->nr > max_pass * 2) 301 if (total_scan > max_pass * 2)
272 shrinker->nr = max_pass * 2; 302 total_scan = max_pass * 2;
273 303
274 total_scan = shrinker->nr; 304 trace_mm_shrink_slab_start(shrinker, shrink, nr,
275 shrinker->nr = 0; 305 nr_pages_scanned, lru_pages,
306 max_pass, delta, total_scan);
276 307
277 while (total_scan >= SHRINK_BATCH) { 308 while (total_scan >= batch_size) {
278 long this_scan = SHRINK_BATCH;
279 int shrink_ret;
280 int nr_before; 309 int nr_before;
281 310
282 nr_before = do_shrinker_shrink(shrinker, shrink, 0); 311 nr_before = do_shrinker_shrink(shrinker, shrink, 0);
283 shrink_ret = do_shrinker_shrink(shrinker, shrink, 312 shrink_ret = do_shrinker_shrink(shrinker, shrink,
284 this_scan); 313 batch_size);
285 if (shrink_ret == -1) 314 if (shrink_ret == -1)
286 break; 315 break;
287 if (shrink_ret < nr_before) 316 if (shrink_ret < nr_before)
288 ret += nr_before - shrink_ret; 317 ret += nr_before - shrink_ret;
289 count_vm_events(SLABS_SCANNED, this_scan); 318 count_vm_events(SLABS_SCANNED, batch_size);
290 total_scan -= this_scan; 319 total_scan -= batch_size;
291 320
292 cond_resched(); 321 cond_resched();
293 } 322 }
294 323
295 shrinker->nr += total_scan; 324 /*
325 * move the unused scan count back into the shrinker in a
326 * manner that handles concurrent updates. If we exhausted the
327 * scan, there is no need to do an update.
328 */
329 do {
330 nr = shrinker->nr;
331 new_nr = total_scan + nr;
332 if (total_scan <= 0)
333 break;
334 } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
335
336 trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
296 } 337 }
297 up_read(&shrinker_rwsem); 338 up_read(&shrinker_rwsem);
298out: 339out:
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7389b7da3a8d..c50818f0473b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -97,8 +97,7 @@ static int
97rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) 97rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
98{ 98{
99 static uint32_t clntid; 99 static uint32_t clntid;
100 struct nameidata nd; 100 struct path path, dir;
101 struct path path;
102 char name[15]; 101 char name[15];
103 struct qstr q = { 102 struct qstr q = {
104 .name = name, 103 .name = name,
@@ -113,7 +112,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
113 path.mnt = rpc_get_mount(); 112 path.mnt = rpc_get_mount();
114 if (IS_ERR(path.mnt)) 113 if (IS_ERR(path.mnt))
115 return PTR_ERR(path.mnt); 114 return PTR_ERR(path.mnt);
116 error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &nd); 115 error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &dir);
117 if (error) 116 if (error)
118 goto err; 117 goto err;
119 118
@@ -121,7 +120,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
121 q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); 120 q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
122 name[sizeof(name) - 1] = '\0'; 121 name[sizeof(name) - 1] = '\0';
123 q.hash = full_name_hash(q.name, q.len); 122 q.hash = full_name_hash(q.name, q.len);
124 path.dentry = rpc_create_client_dir(nd.path.dentry, &q, clnt); 123 path.dentry = rpc_create_client_dir(dir.dentry, &q, clnt);
125 if (!IS_ERR(path.dentry)) 124 if (!IS_ERR(path.dentry))
126 break; 125 break;
127 error = PTR_ERR(path.dentry); 126 error = PTR_ERR(path.dentry);
@@ -132,11 +131,11 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
132 goto err_path_put; 131 goto err_path_put;
133 } 132 }
134 } 133 }
135 path_put(&nd.path); 134 path_put(&dir);
136 clnt->cl_path = path; 135 clnt->cl_path = path;
137 return 0; 136 return 0;
138err_path_put: 137err_path_put:
139 path_put(&nd.path); 138 path_put(&dir);
140err: 139err:
141 rpc_put_mount(); 140 rpc_put_mount();
142 return error; 141 return error;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0722a25a3a33..ec68e1c05b85 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -808,8 +808,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
808 struct net *net = sock_net(sk); 808 struct net *net = sock_net(sk);
809 struct unix_sock *u = unix_sk(sk); 809 struct unix_sock *u = unix_sk(sk);
810 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr; 810 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
811 char *sun_path = sunaddr->sun_path;
811 struct dentry *dentry = NULL; 812 struct dentry *dentry = NULL;
812 struct nameidata nd; 813 struct path path;
813 int err; 814 int err;
814 unsigned hash; 815 unsigned hash;
815 struct unix_address *addr; 816 struct unix_address *addr;
@@ -845,48 +846,44 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
845 addr->hash = hash ^ sk->sk_type; 846 addr->hash = hash ^ sk->sk_type;
846 atomic_set(&addr->refcnt, 1); 847 atomic_set(&addr->refcnt, 1);
847 848
848 if (sunaddr->sun_path[0]) { 849 if (sun_path[0]) {
849 unsigned int mode; 850 unsigned int mode;
850 err = 0; 851 err = 0;
851 /* 852 /*
852 * Get the parent directory, calculate the hash for last 853 * Get the parent directory, calculate the hash for last
853 * component. 854 * component.
854 */ 855 */
855 err = kern_path_parent(sunaddr->sun_path, &nd); 856 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
856 if (err)
857 goto out_mknod_parent;
858
859 dentry = lookup_create(&nd, 0);
860 err = PTR_ERR(dentry); 857 err = PTR_ERR(dentry);
861 if (IS_ERR(dentry)) 858 if (IS_ERR(dentry))
862 goto out_mknod_unlock; 859 goto out_mknod_parent;
863 860
864 /* 861 /*
865 * All right, let's create it. 862 * All right, let's create it.
866 */ 863 */
867 mode = S_IFSOCK | 864 mode = S_IFSOCK |
868 (SOCK_INODE(sock)->i_mode & ~current_umask()); 865 (SOCK_INODE(sock)->i_mode & ~current_umask());
869 err = mnt_want_write(nd.path.mnt); 866 err = mnt_want_write(path.mnt);
870 if (err) 867 if (err)
871 goto out_mknod_dput; 868 goto out_mknod_dput;
872 err = security_path_mknod(&nd.path, dentry, mode, 0); 869 err = security_path_mknod(&path, dentry, mode, 0);
873 if (err) 870 if (err)
874 goto out_mknod_drop_write; 871 goto out_mknod_drop_write;
875 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0); 872 err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
876out_mknod_drop_write: 873out_mknod_drop_write:
877 mnt_drop_write(nd.path.mnt); 874 mnt_drop_write(path.mnt);
878 if (err) 875 if (err)
879 goto out_mknod_dput; 876 goto out_mknod_dput;
880 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 877 mutex_unlock(&path.dentry->d_inode->i_mutex);
881 dput(nd.path.dentry); 878 dput(path.dentry);
882 nd.path.dentry = dentry; 879 path.dentry = dentry;
883 880
884 addr->hash = UNIX_HASH_SIZE; 881 addr->hash = UNIX_HASH_SIZE;
885 } 882 }
886 883
887 spin_lock(&unix_table_lock); 884 spin_lock(&unix_table_lock);
888 885
889 if (!sunaddr->sun_path[0]) { 886 if (!sun_path[0]) {
890 err = -EADDRINUSE; 887 err = -EADDRINUSE;
891 if (__unix_find_socket_byname(net, sunaddr, addr_len, 888 if (__unix_find_socket_byname(net, sunaddr, addr_len,
892 sk->sk_type, hash)) { 889 sk->sk_type, hash)) {
@@ -897,8 +894,8 @@ out_mknod_drop_write:
897 list = &unix_socket_table[addr->hash]; 894 list = &unix_socket_table[addr->hash];
898 } else { 895 } else {
899 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)]; 896 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
900 u->dentry = nd.path.dentry; 897 u->dentry = path.dentry;
901 u->mnt = nd.path.mnt; 898 u->mnt = path.mnt;
902 } 899 }
903 900
904 err = 0; 901 err = 0;
@@ -915,9 +912,8 @@ out:
915 912
916out_mknod_dput: 913out_mknod_dput:
917 dput(dentry); 914 dput(dentry);
918out_mknod_unlock: 915 mutex_unlock(&path.dentry->d_inode->i_mutex);
919 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 916 path_put(&path);
920 path_put(&nd.path);
921out_mknod_parent: 917out_mknod_parent:
922 if (err == -EEXIST) 918 if (err == -EEXIST)
923 err = -EADDRINUSE; 919 err = -EADDRINUSE;
diff --git a/security/capability.c b/security/capability.c
index bbb51156261b..2984ea4f776f 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -181,7 +181,7 @@ static int cap_inode_follow_link(struct dentry *dentry,
181 return 0; 181 return 0;
182} 182}
183 183
184static int cap_inode_permission(struct inode *inode, int mask, unsigned flags) 184static int cap_inode_permission(struct inode *inode, int mask)
185{ 185{
186 return 0; 186 return 0;
187} 187}
diff --git a/security/security.c b/security/security.c
index 4ba6d4cc061f..0e4fccfef12c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -518,14 +518,7 @@ int security_inode_permission(struct inode *inode, int mask)
518{ 518{
519 if (unlikely(IS_PRIVATE(inode))) 519 if (unlikely(IS_PRIVATE(inode)))
520 return 0; 520 return 0;
521 return security_ops->inode_permission(inode, mask, 0); 521 return security_ops->inode_permission(inode, mask);
522}
523
524int security_inode_exec_permission(struct inode *inode, unsigned int flags)
525{
526 if (unlikely(IS_PRIVATE(inode)))
527 return 0;
528 return security_ops->inode_permission(inode, MAY_EXEC, flags);
529} 522}
530 523
531int security_inode_setattr(struct dentry *dentry, struct iattr *attr) 524int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index d515b2128a4e..dca1c22d9276 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -527,7 +527,7 @@ int avc_audit(u32 ssid, u32 tsid,
527 * happened a little later. 527 * happened a little later.
528 */ 528 */
529 if ((a->type == LSM_AUDIT_DATA_INODE) && 529 if ((a->type == LSM_AUDIT_DATA_INODE) &&
530 (flags & IPERM_FLAG_RCU)) 530 (flags & MAY_NOT_BLOCK))
531 return -ECHILD; 531 return -ECHILD;
532 532
533 a->selinux_audit_data.tclass = tclass; 533 a->selinux_audit_data.tclass = tclass;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 422515509f3d..9f4c77dca35f 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2659,12 +2659,13 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na
2659 return dentry_has_perm(cred, dentry, FILE__READ); 2659 return dentry_has_perm(cred, dentry, FILE__READ);
2660} 2660}
2661 2661
2662static int selinux_inode_permission(struct inode *inode, int mask, unsigned flags) 2662static int selinux_inode_permission(struct inode *inode, int mask)
2663{ 2663{
2664 const struct cred *cred = current_cred(); 2664 const struct cred *cred = current_cred();
2665 struct common_audit_data ad; 2665 struct common_audit_data ad;
2666 u32 perms; 2666 u32 perms;
2667 bool from_access; 2667 bool from_access;
2668 unsigned flags = mask & MAY_NOT_BLOCK;
2668 2669
2669 from_access = mask & MAY_ACCESS; 2670 from_access = mask & MAY_ACCESS;
2670 mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND); 2671 mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND);
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 9831a39c11f6..f375eb2e1957 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -689,9 +689,10 @@ static int smack_inode_rename(struct inode *old_inode,
689 * 689 *
690 * Returns 0 if access is permitted, -EACCES otherwise 690 * Returns 0 if access is permitted, -EACCES otherwise
691 */ 691 */
692static int smack_inode_permission(struct inode *inode, int mask, unsigned flags) 692static int smack_inode_permission(struct inode *inode, int mask)
693{ 693{
694 struct smk_audit_info ad; 694 struct smk_audit_info ad;
695 int no_block = mask & MAY_NOT_BLOCK;
695 696
696 mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND); 697 mask &= (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND);
697 /* 698 /*
@@ -701,7 +702,7 @@ static int smack_inode_permission(struct inode *inode, int mask, unsigned flags)
701 return 0; 702 return 0;
702 703
703 /* May be droppable after audit */ 704 /* May be droppable after audit */
704 if (flags & IPERM_FLAG_RCU) 705 if (no_block)
705 return -ECHILD; 706 return -ECHILD;
706 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_INODE); 707 smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_INODE);
707 smk_ad_setfield_u_fs_inode(&ad, inode); 708 smk_ad_setfield_u_fs_inode(&ad, inode);
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index d1e05b047715..8d95e91c9fc4 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -103,7 +103,7 @@ char *tomoyo_realpath_from_path(struct path *path)
103 if (!buf) 103 if (!buf)
104 break; 104 break;
105 /* Get better name for socket. */ 105 /* Get better name for socket. */
106 if (dentry->d_sb && dentry->d_sb->s_magic == SOCKFS_MAGIC) { 106 if (dentry->d_sb->s_magic == SOCKFS_MAGIC) {
107 struct inode *inode = dentry->d_inode; 107 struct inode *inode = dentry->d_inode;
108 struct socket *sock = inode ? SOCKET_I(inode) : NULL; 108 struct socket *sock = inode ? SOCKET_I(inode) : NULL;
109 struct sock *sk = sock ? sock->sk : NULL; 109 struct sock *sk = sock ? sock->sk : NULL;