aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-08-01 06:40:02 -0400
committerNeilBrown <neilb@suse.de>2012-08-01 06:40:02 -0400
commitbb181e2e48f8c85db08c9cb015cbba9618dbf05c (patch)
tree191bc24dd97bcb174535cc217af082f16da3b43d /fs
parentd57368afe63b3b7b45ce6c2b8c5276417935be2f (diff)
parentc039c332f23e794deb6d6f37b9f07ff3b27fb2cf (diff)
Merge commit 'c039c332f23e794deb6d6f37b9f07ff3b27fb2cf' into md
Pull in pre-requisites for adding raid10 support to dm-raid.
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_dentry.c4
-rw-r--r--fs/9p/vfs_inode.c170
-rw-r--r--fs/9p/vfs_inode_dotl.c59
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/super.c1
-rw-r--r--fs/affs/affs.h11
-rw-r--r--fs/affs/amigaffs.c22
-rw-r--r--fs/affs/bitmap.c4
-rw-r--r--fs/affs/namei.c4
-rw-r--r--fs/affs/super.c68
-rw-r--r--fs/afs/dir.c14
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/super.c3
-rw-r--r--fs/aio.c73
-rw-r--r--fs/attr.c3
-rw-r--r--fs/autofs4/dev-ioctl.c4
-rw-r--r--fs/autofs4/root.c4
-rw-r--r--fs/bad_inode.c4
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/bfs/dir.c4
-rw-r--r--fs/block_dev.c36
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/inode.c6
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/cachefiles/namei.c2
-rw-r--r--fs/cachefiles/rdwr.c8
-rw-r--r--fs/ceph/dir.c77
-rw-r--r--fs/ceph/file.c26
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/super.h6
-rw-r--r--fs/cifs/cifsfs.c11
-rw-r--r--fs/cifs/cifsfs.h7
-rw-r--r--fs/cifs/cifssmb.c30
-rw-r--r--fs/cifs/connect.c18
-rw-r--r--fs/cifs/dir.c448
-rw-r--r--fs/cifs/inode.c5
-rw-r--r--fs/cifs/readdir.c7
-rw-r--r--fs/cifs/transport.c26
-rw-r--r--fs/coda/cache.c10
-rw-r--r--fs/coda/dir.c14
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/dcache.c44
-rw-r--r--fs/debugfs/inode.c91
-rw-r--r--fs/devpts/inode.c6
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/dentry.c20
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h14
-rw-r--r--fs/ecryptfs/inode.c9
-rw-r--r--fs/ecryptfs/kthread.c73
-rw-r--r--fs/ecryptfs/main.c8
-rw-r--r--fs/efs/efs.h2
-rw-r--r--fs/efs/namei.c3
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exofs/namei.c4
-rw-r--r--fs/exofs/ore.c8
-rw-r--r--fs/exofs/ore_raid.c91
-rw-r--r--fs/exportfs/expfs.c16
-rw-r--r--fs/ext2/namei.c8
-rw-r--r--fs/ext2/super.c12
-rw-r--r--fs/ext3/dir.c3
-rw-r--r--fs/ext3/fsync.c9
-rw-r--r--fs/ext3/namei.c8
-rw-r--r--fs/ext3/super.c8
-rw-r--r--fs/ext4/dir.c75
-rw-r--r--fs/ext4/file.c9
-rw-r--r--fs/ext4/fsync.c11
-rw-r--r--fs/ext4/ioctl.c5
-rw-r--r--fs/ext4/namei.c8
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c16
-rw-r--r--fs/fifo.c9
-rw-r--r--fs/file_table.c81
-rw-r--r--fs/freevxfs/vxfs_lookup.c4
-rw-r--r--fs/fs-writeback.c5
-rw-r--r--fs/fs_struct.c32
-rw-r--r--fs/fuse/dir.c99
-rw-r--r--fs/gfs2/aops.c18
-rw-r--r--fs/gfs2/bmap.c21
-rw-r--r--fs/gfs2/dentry.c6
-rw-r--r--fs/gfs2/dir.c9
-rw-r--r--fs/gfs2/file.c65
-rw-r--r--fs/gfs2/glock.c39
-rw-r--r--fs/gfs2/incore.h54
-rw-r--r--fs/gfs2/inode.c101
-rw-r--r--fs/gfs2/lops.c9
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c5
-rw-r--r--fs/gfs2/ops_fstype.c41
-rw-r--r--fs/gfs2/quota.c76
-rw-r--r--fs/gfs2/quota.h2
-rw-r--r--fs/gfs2/rgrp.c833
-rw-r--r--fs/gfs2/rgrp.h45
-rw-r--r--fs/gfs2/super.c32
-rw-r--r--fs/gfs2/sys.c23
-rw-r--r--fs/gfs2/trace_gfs2.h59
-rw-r--r--fs/gfs2/trans.h2
-rw-r--r--fs/gfs2/util.h18
-rw-r--r--fs/gfs2/xattr.c30
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/extent.c2
-rw-r--r--fs/hfs/hfs_fs.h15
-rw-r--r--fs/hfs/inode.c16
-rw-r--r--fs/hfs/mdb.c13
-rw-r--r--fs/hfs/super.c73
-rw-r--r--fs/hfs/sysdep.c4
-rw-r--r--fs/hfsplus/bitmap.c4
-rw-r--r--fs/hfsplus/dir.c6
-rw-r--r--fs/hfsplus/hfsplus_fs.h7
-rw-r--r--fs/hfsplus/inode.c8
-rw-r--r--fs/hfsplus/super.c46
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/hpfs/dir.c2
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hppfs/hppfs.c22
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c2
-rw-r--r--fs/internal.h10
-rw-r--r--fs/isofs/export.c1
-rw-r--r--fs/isofs/isofs.h2
-rw-r--r--fs/isofs/namei.c2
-rw-r--r--fs/jbd/recovery.c7
-rw-r--r--fs/jffs2/dir.c16
-rw-r--r--fs/jfs/namei.c18
-rw-r--r--fs/jfs/super.c5
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/locks.c6
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/super.c3
-rw-r--r--fs/minix/namei.c4
-rw-r--r--fs/mount.h13
-rw-r--r--fs/namei.c808
-rw-r--r--fs/namespace.c195
-rw-r--r--fs/ncpfs/dir.c14
-rw-r--r--fs/nfs/dir.c314
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/nfs3proc.c2
-rw-r--r--fs/nfs/nfs4proc.c37
-rw-r--r--fs/nfs/objlayout/objio_osd.c25
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/nilfs2/namei.c4
-rw-r--r--fs/nilfs2/super.c4
-rw-r--r--fs/notify/fanotify/fanotify_user.c8
-rw-r--r--fs/notify/fsnotify.c3
-rw-r--r--fs/ntfs/namei.c2
-rw-r--r--fs/ocfs2/dcache.c22
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c2
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/omfs/dir.c4
-rw-r--r--fs/open.c211
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/pnode.c5
-rw-r--r--fs/proc/base.c51
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/internal.h6
-rw-r--r--fs/proc/namespaces.c4
-rw-r--r--fs/proc/proc_devtree.c5
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--fs/proc/proc_sysctl.c6
-rw-r--r--fs/proc/root.c10
-rw-r--r--fs/proc_namespace.c7
-rw-r--r--fs/qnx4/namei.c2
-rw-r--r--fs/qnx4/qnx4.h2
-rw-r--r--fs/qnx6/inode.c1
-rw-r--r--fs/qnx6/namei.c2
-rw-r--r--fs/qnx6/qnx6.h2
-rw-r--r--fs/quota/dquot.c26
-rw-r--r--fs/quota/quota.c6
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/read_write.c18
-rw-r--r--fs/reiserfs/namei.c12
-rw-r--r--fs/reiserfs/procfs.c2
-rw-r--r--fs/reiserfs/super.c5
-rw-r--r--fs/reiserfs/xattr.c4
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/seq_file.c18
-rw-r--r--fs/squashfs/namei.c2
-rw-r--r--fs/super.c22
-rw-r--r--fs/sync.c63
-rw-r--r--fs/sysfs/dir.c31
-rw-r--r--fs/sysfs/mount.c4
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/sysv/inode.c18
-rw-r--r--fs/sysv/namei.c4
-rw-r--r--fs/sysv/sysv.h1
-rw-r--r--fs/ubifs/debug.c11
-rw-r--r--fs/ubifs/debug.h5
-rw-r--r--fs/ubifs/dir.c6
-rw-r--r--fs/ubifs/orphan.c4
-rw-r--r--fs/ubifs/replay.c20
-rw-r--r--fs/ubifs/sb.c8
-rw-r--r--fs/ubifs/super.c3
-rw-r--r--fs/udf/inode.c4
-rw-r--r--fs/udf/namei.c5
-rw-r--r--fs/udf/super.c130
-rw-r--r--fs/udf/truncate.c4
-rw-r--r--fs/udf/udfdecl.h1
-rw-r--r--fs/ufs/balloc.c8
-rw-r--r--fs/ufs/ialloc.c4
-rw-r--r--fs/ufs/namei.c4
-rw-r--r--fs/ufs/super.c148
-rw-r--r--fs/ufs/ufs.h5
-rw-r--r--fs/ufs/ufs_fs.h1
-rw-r--r--fs/xfs/xfs_alloc.c19
-rw-r--r--fs/xfs/xfs_buf.c53
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_ioctl.c7
-rw-r--r--fs/xfs/xfs_iops.c6
216 files changed, 3715 insertions, 2621 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index e78956cbd702..34c59f14a1c9 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -144,7 +144,7 @@ extern void v9fs_session_close(struct v9fs_session_info *v9ses);
144extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); 144extern void v9fs_session_cancel(struct v9fs_session_info *v9ses);
145extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); 145extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
146extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, 146extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
147 struct nameidata *nameidata); 147 unsigned int flags);
148extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); 148extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
149extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); 149extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
150extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 150extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index d529437ff442..64600b5d0522 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -100,13 +100,13 @@ static void v9fs_dentry_release(struct dentry *dentry)
100 } 100 }
101} 101}
102 102
103static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) 103static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
104{ 104{
105 struct p9_fid *fid; 105 struct p9_fid *fid;
106 struct inode *inode; 106 struct inode *inode;
107 struct v9fs_inode *v9inode; 107 struct v9fs_inode *v9inode;
108 108
109 if (nd->flags & LOOKUP_RCU) 109 if (flags & LOOKUP_RCU)
110 return -ECHILD; 110 return -ECHILD;
111 111
112 inode = dentry->d_inode; 112 inode = dentry->d_inode;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 57ccb7537dae..cbf9dbb1b2a2 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -712,88 +712,34 @@ error:
712} 712}
713 713
714/** 714/**
715 * v9fs_vfs_create - VFS hook to create files 715 * v9fs_vfs_create - VFS hook to create a regular file
716 *
717 * open(.., O_CREAT) is handled in v9fs_vfs_atomic_open(). This is only called
718 * for mknod(2).
719 *
716 * @dir: directory inode that is being created 720 * @dir: directory inode that is being created
717 * @dentry: dentry that is being deleted 721 * @dentry: dentry that is being deleted
718 * @mode: create permissions 722 * @mode: create permissions
719 * @nd: path information
720 * 723 *
721 */ 724 */
722 725
723static int 726static int
724v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 727v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
725 struct nameidata *nd) 728 bool excl)
726{ 729{
727 int err; 730 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
728 u32 perm; 731 u32 perm = unixmode2p9mode(v9ses, mode);
729 int flags; 732 struct p9_fid *fid;
730 struct file *filp;
731 struct v9fs_inode *v9inode;
732 struct v9fs_session_info *v9ses;
733 struct p9_fid *fid, *inode_fid;
734
735 err = 0;
736 fid = NULL;
737 v9ses = v9fs_inode2v9ses(dir);
738 perm = unixmode2p9mode(v9ses, mode);
739 if (nd)
740 flags = nd->intent.open.flags;
741 else
742 flags = O_RDWR;
743 733
744 fid = v9fs_create(v9ses, dir, dentry, NULL, perm, 734 /* P9_OEXCL? */
745 v9fs_uflags2omode(flags, 735 fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_ORDWR);
746 v9fs_proto_dotu(v9ses))); 736 if (IS_ERR(fid))
747 if (IS_ERR(fid)) { 737 return PTR_ERR(fid);
748 err = PTR_ERR(fid);
749 fid = NULL;
750 goto error;
751 }
752 738
753 v9fs_invalidate_inode_attr(dir); 739 v9fs_invalidate_inode_attr(dir);
754 /* if we are opening a file, assign the open fid to the file */ 740 p9_client_clunk(fid);
755 if (nd) {
756 v9inode = V9FS_I(dentry->d_inode);
757 mutex_lock(&v9inode->v_mutex);
758 if (v9ses->cache && !v9inode->writeback_fid &&
759 ((flags & O_ACCMODE) != O_RDONLY)) {
760 /*
761 * clone a fid and add it to writeback_fid
762 * we do it during open time instead of
763 * page dirty time via write_begin/page_mkwrite
764 * because we want write after unlink usecase
765 * to work.
766 */
767 inode_fid = v9fs_writeback_fid(dentry);
768 if (IS_ERR(inode_fid)) {
769 err = PTR_ERR(inode_fid);
770 mutex_unlock(&v9inode->v_mutex);
771 goto error;
772 }
773 v9inode->writeback_fid = (void *) inode_fid;
774 }
775 mutex_unlock(&v9inode->v_mutex);
776 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
777 if (IS_ERR(filp)) {
778 err = PTR_ERR(filp);
779 goto error;
780 }
781
782 filp->private_data = fid;
783#ifdef CONFIG_9P_FSCACHE
784 if (v9ses->cache)
785 v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
786#endif
787 } else
788 p9_client_clunk(fid);
789 741
790 return 0; 742 return 0;
791
792error:
793 if (fid)
794 p9_client_clunk(fid);
795
796 return err;
797} 743}
798 744
799/** 745/**
@@ -839,7 +785,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
839 */ 785 */
840 786
841struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, 787struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
842 struct nameidata *nameidata) 788 unsigned int flags)
843{ 789{
844 struct dentry *res; 790 struct dentry *res;
845 struct super_block *sb; 791 struct super_block *sb;
@@ -849,8 +795,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
849 char *name; 795 char *name;
850 int result = 0; 796 int result = 0;
851 797
852 p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", 798 p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p flags: %x\n",
853 dir, dentry->d_name.name, dentry, nameidata); 799 dir, dentry->d_name.name, dentry, flags);
854 800
855 if (dentry->d_name.len > NAME_MAX) 801 if (dentry->d_name.len > NAME_MAX)
856 return ERR_PTR(-ENAMETOOLONG); 802 return ERR_PTR(-ENAMETOOLONG);
@@ -910,6 +856,86 @@ error:
910 return ERR_PTR(result); 856 return ERR_PTR(result);
911} 857}
912 858
859static int
860v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
861 struct file *file, unsigned flags, umode_t mode,
862 int *opened)
863{
864 int err;
865 u32 perm;
866 struct v9fs_inode *v9inode;
867 struct v9fs_session_info *v9ses;
868 struct p9_fid *fid, *inode_fid;
869 struct dentry *res = NULL;
870
871 if (d_unhashed(dentry)) {
872 res = v9fs_vfs_lookup(dir, dentry, 0);
873 if (IS_ERR(res))
874 return PTR_ERR(res);
875
876 if (res)
877 dentry = res;
878 }
879
880 /* Only creates */
881 if (!(flags & O_CREAT) || dentry->d_inode)
882 return finish_no_open(file, res);
883
884 err = 0;
885 fid = NULL;
886 v9ses = v9fs_inode2v9ses(dir);
887 perm = unixmode2p9mode(v9ses, mode);
888 fid = v9fs_create(v9ses, dir, dentry, NULL, perm,
889 v9fs_uflags2omode(flags,
890 v9fs_proto_dotu(v9ses)));
891 if (IS_ERR(fid)) {
892 err = PTR_ERR(fid);
893 fid = NULL;
894 goto error;
895 }
896
897 v9fs_invalidate_inode_attr(dir);
898 v9inode = V9FS_I(dentry->d_inode);
899 mutex_lock(&v9inode->v_mutex);
900 if (v9ses->cache && !v9inode->writeback_fid &&
901 ((flags & O_ACCMODE) != O_RDONLY)) {
902 /*
903 * clone a fid and add it to writeback_fid
904 * we do it during open time instead of
905 * page dirty time via write_begin/page_mkwrite
906 * because we want write after unlink usecase
907 * to work.
908 */
909 inode_fid = v9fs_writeback_fid(dentry);
910 if (IS_ERR(inode_fid)) {
911 err = PTR_ERR(inode_fid);
912 mutex_unlock(&v9inode->v_mutex);
913 goto error;
914 }
915 v9inode->writeback_fid = (void *) inode_fid;
916 }
917 mutex_unlock(&v9inode->v_mutex);
918 err = finish_open(file, dentry, generic_file_open, opened);
919 if (err)
920 goto error;
921
922 file->private_data = fid;
923#ifdef CONFIG_9P_FSCACHE
924 if (v9ses->cache)
925 v9fs_cache_inode_set_cookie(dentry->d_inode, file);
926#endif
927
928 *opened |= FILE_CREATED;
929out:
930 dput(res);
931 return err;
932
933error:
934 if (fid)
935 p9_client_clunk(fid);
936 goto out;
937}
938
913/** 939/**
914 * v9fs_vfs_unlink - VFS unlink hook to delete an inode 940 * v9fs_vfs_unlink - VFS unlink hook to delete an inode
915 * @i: inode that is being unlinked 941 * @i: inode that is being unlinked
@@ -1488,6 +1514,7 @@ out:
1488static const struct inode_operations v9fs_dir_inode_operations_dotu = { 1514static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1489 .create = v9fs_vfs_create, 1515 .create = v9fs_vfs_create,
1490 .lookup = v9fs_vfs_lookup, 1516 .lookup = v9fs_vfs_lookup,
1517 .atomic_open = v9fs_vfs_atomic_open,
1491 .symlink = v9fs_vfs_symlink, 1518 .symlink = v9fs_vfs_symlink,
1492 .link = v9fs_vfs_link, 1519 .link = v9fs_vfs_link,
1493 .unlink = v9fs_vfs_unlink, 1520 .unlink = v9fs_vfs_unlink,
@@ -1502,6 +1529,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1502static const struct inode_operations v9fs_dir_inode_operations = { 1529static const struct inode_operations v9fs_dir_inode_operations = {
1503 .create = v9fs_vfs_create, 1530 .create = v9fs_vfs_create,
1504 .lookup = v9fs_vfs_lookup, 1531 .lookup = v9fs_vfs_lookup,
1532 .atomic_open = v9fs_vfs_atomic_open,
1505 .unlink = v9fs_vfs_unlink, 1533 .unlink = v9fs_vfs_unlink,
1506 .mkdir = v9fs_vfs_mkdir, 1534 .mkdir = v9fs_vfs_mkdir,
1507 .rmdir = v9fs_vfs_rmdir, 1535 .rmdir = v9fs_vfs_rmdir,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index e3dd2a1e2bfc..40895546e103 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -230,20 +230,25 @@ int v9fs_open_to_dotl_flags(int flags)
230 * @dir: directory inode that is being created 230 * @dir: directory inode that is being created
231 * @dentry: dentry that is being deleted 231 * @dentry: dentry that is being deleted
232 * @mode: create permissions 232 * @mode: create permissions
233 * @nd: path information
234 * 233 *
235 */ 234 */
236 235
237static int 236static int
238v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, 237v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
239 struct nameidata *nd) 238 bool excl)
239{
240 return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
241}
242
243static int
244v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
245 struct file *file, unsigned flags, umode_t omode,
246 int *opened)
240{ 247{
241 int err = 0; 248 int err = 0;
242 gid_t gid; 249 gid_t gid;
243 int flags;
244 umode_t mode; 250 umode_t mode;
245 char *name = NULL; 251 char *name = NULL;
246 struct file *filp;
247 struct p9_qid qid; 252 struct p9_qid qid;
248 struct inode *inode; 253 struct inode *inode;
249 struct p9_fid *fid = NULL; 254 struct p9_fid *fid = NULL;
@@ -251,19 +256,23 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
251 struct p9_fid *dfid, *ofid, *inode_fid; 256 struct p9_fid *dfid, *ofid, *inode_fid;
252 struct v9fs_session_info *v9ses; 257 struct v9fs_session_info *v9ses;
253 struct posix_acl *pacl = NULL, *dacl = NULL; 258 struct posix_acl *pacl = NULL, *dacl = NULL;
259 struct dentry *res = NULL;
254 260
255 v9ses = v9fs_inode2v9ses(dir); 261 if (d_unhashed(dentry)) {
256 if (nd) 262 res = v9fs_vfs_lookup(dir, dentry, 0);
257 flags = nd->intent.open.flags; 263 if (IS_ERR(res))
258 else { 264 return PTR_ERR(res);
259 /* 265
260 * create call without LOOKUP_OPEN is due 266 if (res)
261 * to mknod of regular files. So use mknod 267 dentry = res;
262 * operation.
263 */
264 return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
265 } 268 }
266 269
270 /* Only creates */
271 if (!(flags & O_CREAT) || dentry->d_inode)
272 return finish_no_open(file, res);
273
274 v9ses = v9fs_inode2v9ses(dir);
275
267 name = (char *) dentry->d_name.name; 276 name = (char *) dentry->d_name.name;
268 p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n", 277 p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n",
269 name, flags, omode); 278 name, flags, omode);
@@ -272,7 +281,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
272 if (IS_ERR(dfid)) { 281 if (IS_ERR(dfid)) {
273 err = PTR_ERR(dfid); 282 err = PTR_ERR(dfid);
274 p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); 283 p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err);
275 return err; 284 goto out;
276 } 285 }
277 286
278 /* clone a fid to use for creation */ 287 /* clone a fid to use for creation */
@@ -280,7 +289,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
280 if (IS_ERR(ofid)) { 289 if (IS_ERR(ofid)) {
281 err = PTR_ERR(ofid); 290 err = PTR_ERR(ofid);
282 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); 291 p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
283 return err; 292 goto out;
284 } 293 }
285 294
286 gid = v9fs_get_fsgid_for_create(dir); 295 gid = v9fs_get_fsgid_for_create(dir);
@@ -345,17 +354,18 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
345 } 354 }
346 mutex_unlock(&v9inode->v_mutex); 355 mutex_unlock(&v9inode->v_mutex);
347 /* Since we are opening a file, assign the open fid to the file */ 356 /* Since we are opening a file, assign the open fid to the file */
348 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 357 err = finish_open(file, dentry, generic_file_open, opened);
349 if (IS_ERR(filp)) { 358 if (err)
350 err = PTR_ERR(filp);
351 goto err_clunk_old_fid; 359 goto err_clunk_old_fid;
352 } 360 file->private_data = ofid;
353 filp->private_data = ofid;
354#ifdef CONFIG_9P_FSCACHE 361#ifdef CONFIG_9P_FSCACHE
355 if (v9ses->cache) 362 if (v9ses->cache)
356 v9fs_cache_inode_set_cookie(inode, filp); 363 v9fs_cache_inode_set_cookie(inode, file);
357#endif 364#endif
358 return 0; 365 *opened |= FILE_CREATED;
366out:
367 dput(res);
368 return err;
359 369
360error: 370error:
361 if (fid) 371 if (fid)
@@ -364,7 +374,7 @@ err_clunk_old_fid:
364 if (ofid) 374 if (ofid)
365 p9_client_clunk(ofid); 375 p9_client_clunk(ofid);
366 v9fs_set_create_acl(NULL, &dacl, &pacl); 376 v9fs_set_create_acl(NULL, &dacl, &pacl);
367 return err; 377 goto out;
368} 378}
369 379
370/** 380/**
@@ -982,6 +992,7 @@ out:
982 992
983const struct inode_operations v9fs_dir_inode_operations_dotl = { 993const struct inode_operations v9fs_dir_inode_operations_dotl = {
984 .create = v9fs_vfs_create_dotl, 994 .create = v9fs_vfs_create_dotl,
995 .atomic_open = v9fs_vfs_atomic_open_dotl,
985 .lookup = v9fs_vfs_lookup, 996 .lookup = v9fs_vfs_lookup,
986 .link = v9fs_vfs_link_dotl, 997 .link = v9fs_vfs_link_dotl,
987 .symlink = v9fs_vfs_symlink_dotl, 998 .symlink = v9fs_vfs_symlink_dotl,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8c92a9ba8330..137d50396898 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -89,7 +89,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
89 if (v9ses->cache) 89 if (v9ses->cache)
90 sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE; 90 sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
91 91
92 sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; 92 sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
93 if (!v9ses->cache) 93 if (!v9ses->cache)
94 sb->s_flags |= MS_SYNCHRONOUS; 94 sb->s_flags |= MS_SYNCHRONOUS;
95 95
@@ -137,7 +137,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
137 goto close_session; 137 goto close_session;
138 } 138 }
139 139
140 sb = sget(fs_type, NULL, v9fs_set_super, v9ses); 140 sb = sget(fs_type, NULL, v9fs_set_super, flags, v9ses);
141 if (IS_ERR(sb)) { 141 if (IS_ERR(sb)) {
142 retval = PTR_ERR(sb); 142 retval = PTR_ERR(sb);
143 goto clunk_fid; 143 goto clunk_fid;
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 3d83075aaa2e..b3be2e7c5643 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -266,7 +266,7 @@ const struct dentry_operations adfs_dentry_operations = {
266}; 266};
267 267
268static struct dentry * 268static struct dentry *
269adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 269adfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
270{ 270{
271 struct inode *inode = NULL; 271 struct inode *inode = NULL;
272 struct object_info obj; 272 struct object_info obj;
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 06fdcc9382c4..bdaec92353c2 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -246,7 +246,6 @@ static struct inode *adfs_alloc_inode(struct super_block *sb)
246static void adfs_i_callback(struct rcu_head *head) 246static void adfs_i_callback(struct rcu_head *head)
247{ 247{
248 struct inode *inode = container_of(head, struct inode, i_rcu); 248 struct inode *inode = container_of(head, struct inode, i_rcu);
249 INIT_LIST_HEAD(&inode->i_dentry);
250 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); 249 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
251} 250}
252 251
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1fceb320d2f2..6e216419f340 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -3,6 +3,7 @@
3#include <linux/buffer_head.h> 3#include <linux/buffer_head.h>
4#include <linux/amigaffs.h> 4#include <linux/amigaffs.h>
5#include <linux/mutex.h> 5#include <linux/mutex.h>
6#include <linux/workqueue.h>
6 7
7/* AmigaOS allows file names with up to 30 characters length. 8/* AmigaOS allows file names with up to 30 characters length.
8 * Names longer than that will be silently truncated. If you 9 * Names longer than that will be silently truncated. If you
@@ -100,6 +101,10 @@ struct affs_sb_info {
100 char *s_prefix; /* Prefix for volumes and assigns. */ 101 char *s_prefix; /* Prefix for volumes and assigns. */
101 char s_volume[32]; /* Volume prefix for absolute symlinks. */ 102 char s_volume[32]; /* Volume prefix for absolute symlinks. */
102 spinlock_t symlink_lock; /* protects the previous two */ 103 spinlock_t symlink_lock; /* protects the previous two */
104 struct super_block *sb; /* the VFS superblock object */
105 int work_queued; /* non-zero delayed work is queued */
106 struct delayed_work sb_work; /* superblock flush delayed work */
107 spinlock_t work_lock; /* protects sb_work and work_queued */
103}; 108};
104 109
105#define SF_INTL 0x0001 /* International filesystem. */ 110#define SF_INTL 0x0001 /* International filesystem. */
@@ -120,6 +125,8 @@ static inline struct affs_sb_info *AFFS_SB(struct super_block *sb)
120 return sb->s_fs_info; 125 return sb->s_fs_info;
121} 126}
122 127
128void affs_mark_sb_dirty(struct super_block *sb);
129
123/* amigaffs.c */ 130/* amigaffs.c */
124 131
125extern int affs_insert_hash(struct inode *inode, struct buffer_head *bh); 132extern int affs_insert_hash(struct inode *inode, struct buffer_head *bh);
@@ -146,9 +153,9 @@ extern void affs_free_bitmap(struct super_block *sb);
146/* namei.c */ 153/* namei.c */
147 154
148extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); 155extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
149extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); 156extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
150extern int affs_unlink(struct inode *dir, struct dentry *dentry); 157extern int affs_unlink(struct inode *dir, struct dentry *dentry);
151extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *); 158extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool);
152extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); 159extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
153extern int affs_rmdir(struct inode *dir, struct dentry *dentry); 160extern int affs_rmdir(struct inode *dir, struct dentry *dentry);
154extern int affs_link(struct dentry *olddentry, struct inode *dir, 161extern int affs_link(struct dentry *olddentry, struct inode *dir,
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 52a6407682e6..eb82ee53ee0b 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -122,22 +122,16 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
122} 122}
123 123
124static void 124static void
125affs_fix_dcache(struct dentry *dentry, u32 entry_ino) 125affs_fix_dcache(struct inode *inode, u32 entry_ino)
126{ 126{
127 struct inode *inode = dentry->d_inode; 127 struct dentry *dentry;
128 void *data = dentry->d_fsdata; 128 struct hlist_node *p;
129 struct list_head *head, *next;
130
131 spin_lock(&inode->i_lock); 129 spin_lock(&inode->i_lock);
132 head = &inode->i_dentry; 130 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
133 next = head->next;
134 while (next != head) {
135 dentry = list_entry(next, struct dentry, d_alias);
136 if (entry_ino == (u32)(long)dentry->d_fsdata) { 131 if (entry_ino == (u32)(long)dentry->d_fsdata) {
137 dentry->d_fsdata = data; 132 dentry->d_fsdata = (void *)inode->i_ino;
138 break; 133 break;
139 } 134 }
140 next = next->next;
141 } 135 }
142 spin_unlock(&inode->i_lock); 136 spin_unlock(&inode->i_lock);
143} 137}
@@ -177,7 +171,11 @@ affs_remove_link(struct dentry *dentry)
177 } 171 }
178 172
179 affs_lock_dir(dir); 173 affs_lock_dir(dir);
180 affs_fix_dcache(dentry, link_ino); 174 /*
175 * if there's a dentry for that block, make it
176 * refer to inode itself.
177 */
178 affs_fix_dcache(inode, link_ino);
181 retval = affs_remove_hash(dir, link_bh); 179 retval = affs_remove_hash(dir, link_bh);
182 if (retval) { 180 if (retval) {
183 affs_unlock_dir(dir); 181 affs_unlock_dir(dir);
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index 3e262711ae06..6e0be43ef6ef 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -103,7 +103,7 @@ affs_free_block(struct super_block *sb, u32 block)
103 *(__be32 *)bh->b_data = cpu_to_be32(tmp - mask); 103 *(__be32 *)bh->b_data = cpu_to_be32(tmp - mask);
104 104
105 mark_buffer_dirty(bh); 105 mark_buffer_dirty(bh);
106 sb->s_dirt = 1; 106 affs_mark_sb_dirty(sb);
107 bm->bm_free++; 107 bm->bm_free++;
108 108
109 mutex_unlock(&sbi->s_bmlock); 109 mutex_unlock(&sbi->s_bmlock);
@@ -248,7 +248,7 @@ find_bit:
248 *(__be32 *)bh->b_data = cpu_to_be32(tmp + mask); 248 *(__be32 *)bh->b_data = cpu_to_be32(tmp + mask);
249 249
250 mark_buffer_dirty(bh); 250 mark_buffer_dirty(bh);
251 sb->s_dirt = 1; 251 affs_mark_sb_dirty(sb);
252 252
253 mutex_unlock(&sbi->s_bmlock); 253 mutex_unlock(&sbi->s_bmlock);
254 254
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 47806940aac0..ff65884a7839 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -211,7 +211,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
211} 211}
212 212
213struct dentry * 213struct dentry *
214affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 214affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
215{ 215{
216 struct super_block *sb = dir->i_sb; 216 struct super_block *sb = dir->i_sb;
217 struct buffer_head *bh; 217 struct buffer_head *bh;
@@ -255,7 +255,7 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
255} 255}
256 256
257int 257int
258affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) 258affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
259{ 259{
260 struct super_block *sb = dir->i_sb; 260 struct super_block *sb = dir->i_sb;
261 struct inode *inode; 261 struct inode *inode;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 0782653a05a2..c70f1e5fc024 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -17,6 +17,7 @@
17#include <linux/magic.h> 17#include <linux/magic.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include <linux/writeback.h>
20#include "affs.h" 21#include "affs.h"
21 22
22extern struct timezone sys_tz; 23extern struct timezone sys_tz;
@@ -25,15 +26,17 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
25static int affs_remount (struct super_block *sb, int *flags, char *data); 26static int affs_remount (struct super_block *sb, int *flags, char *data);
26 27
27static void 28static void
28affs_commit_super(struct super_block *sb, int wait, int clean) 29affs_commit_super(struct super_block *sb, int wait)
29{ 30{
30 struct affs_sb_info *sbi = AFFS_SB(sb); 31 struct affs_sb_info *sbi = AFFS_SB(sb);
31 struct buffer_head *bh = sbi->s_root_bh; 32 struct buffer_head *bh = sbi->s_root_bh;
32 struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); 33 struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
33 34
34 tail->bm_flag = cpu_to_be32(clean); 35 lock_buffer(bh);
35 secs_to_datestamp(get_seconds(), &tail->disk_change); 36 secs_to_datestamp(get_seconds(), &tail->disk_change);
36 affs_fix_checksum(sb, bh); 37 affs_fix_checksum(sb, bh);
38 unlock_buffer(bh);
39
37 mark_buffer_dirty(bh); 40 mark_buffer_dirty(bh);
38 if (wait) 41 if (wait)
39 sync_dirty_buffer(bh); 42 sync_dirty_buffer(bh);
@@ -45,9 +48,7 @@ affs_put_super(struct super_block *sb)
45 struct affs_sb_info *sbi = AFFS_SB(sb); 48 struct affs_sb_info *sbi = AFFS_SB(sb);
46 pr_debug("AFFS: put_super()\n"); 49 pr_debug("AFFS: put_super()\n");
47 50
48 if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) 51 cancel_delayed_work_sync(&sbi->sb_work);
49 affs_commit_super(sb, 1, 1);
50
51 kfree(sbi->s_prefix); 52 kfree(sbi->s_prefix);
52 affs_free_bitmap(sb); 53 affs_free_bitmap(sb);
53 affs_brelse(sbi->s_root_bh); 54 affs_brelse(sbi->s_root_bh);
@@ -55,26 +56,43 @@ affs_put_super(struct super_block *sb)
55 sb->s_fs_info = NULL; 56 sb->s_fs_info = NULL;
56} 57}
57 58
58static void 59static int
59affs_write_super(struct super_block *sb) 60affs_sync_fs(struct super_block *sb, int wait)
60{ 61{
61 lock_super(sb); 62 affs_commit_super(sb, wait);
62 if (!(sb->s_flags & MS_RDONLY)) 63 return 0;
63 affs_commit_super(sb, 1, 2); 64}
64 sb->s_dirt = 0; 65
65 unlock_super(sb); 66static void flush_superblock(struct work_struct *work)
67{
68 struct affs_sb_info *sbi;
69 struct super_block *sb;
70
71 sbi = container_of(work, struct affs_sb_info, sb_work.work);
72 sb = sbi->sb;
66 73
67 pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds()); 74 spin_lock(&sbi->work_lock);
75 sbi->work_queued = 0;
76 spin_unlock(&sbi->work_lock);
77
78 affs_commit_super(sb, 1);
68} 79}
69 80
70static int 81void affs_mark_sb_dirty(struct super_block *sb)
71affs_sync_fs(struct super_block *sb, int wait)
72{ 82{
73 lock_super(sb); 83 struct affs_sb_info *sbi = AFFS_SB(sb);
74 affs_commit_super(sb, wait, 2); 84 unsigned long delay;
75 sb->s_dirt = 0; 85
76 unlock_super(sb); 86 if (sb->s_flags & MS_RDONLY)
77 return 0; 87 return;
88
89 spin_lock(&sbi->work_lock);
90 if (!sbi->work_queued) {
91 delay = msecs_to_jiffies(dirty_writeback_interval * 10);
92 queue_delayed_work(system_long_wq, &sbi->sb_work, delay);
93 sbi->work_queued = 1;
94 }
95 spin_unlock(&sbi->work_lock);
78} 96}
79 97
80static struct kmem_cache * affs_inode_cachep; 98static struct kmem_cache * affs_inode_cachep;
@@ -138,7 +156,6 @@ static const struct super_operations affs_sops = {
138 .write_inode = affs_write_inode, 156 .write_inode = affs_write_inode,
139 .evict_inode = affs_evict_inode, 157 .evict_inode = affs_evict_inode,
140 .put_super = affs_put_super, 158 .put_super = affs_put_super,
141 .write_super = affs_write_super,
142 .sync_fs = affs_sync_fs, 159 .sync_fs = affs_sync_fs,
143 .statfs = affs_statfs, 160 .statfs = affs_statfs,
144 .remount_fs = affs_remount, 161 .remount_fs = affs_remount,
@@ -305,8 +322,11 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
305 return -ENOMEM; 322 return -ENOMEM;
306 323
307 sb->s_fs_info = sbi; 324 sb->s_fs_info = sbi;
325 sbi->sb = sb;
308 mutex_init(&sbi->s_bmlock); 326 mutex_init(&sbi->s_bmlock);
309 spin_lock_init(&sbi->symlink_lock); 327 spin_lock_init(&sbi->symlink_lock);
328 spin_lock_init(&sbi->work_lock);
329 INIT_DELAYED_WORK(&sbi->sb_work, flush_superblock);
310 330
311 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, 331 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
312 &blocksize,&sbi->s_prefix, 332 &blocksize,&sbi->s_prefix,
@@ -531,6 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
531 return -EINVAL; 551 return -EINVAL;
532 } 552 }
533 553
554 flush_delayed_work_sync(&sbi->sb_work);
534 replace_mount_options(sb, new_opts); 555 replace_mount_options(sb, new_opts);
535 556
536 sbi->s_flags = mount_flags; 557 sbi->s_flags = mount_flags;
@@ -549,10 +570,9 @@ affs_remount(struct super_block *sb, int *flags, char *data)
549 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 570 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
550 return 0; 571 return 0;
551 572
552 if (*flags & MS_RDONLY) { 573 if (*flags & MS_RDONLY)
553 affs_write_super(sb);
554 affs_free_bitmap(sb); 574 affs_free_bitmap(sb);
555 } else 575 else
556 res = affs_init_bitmap(sb, flags); 576 res = affs_init_bitmap(sb, flags);
557 577
558 return res; 578 return res;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e22dc4b4a503..db477906ba4f 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -20,16 +20,16 @@
20#include "internal.h" 20#include "internal.h"
21 21
22static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, 22static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
23 struct nameidata *nd); 23 unsigned int flags);
24static int afs_dir_open(struct inode *inode, struct file *file); 24static int afs_dir_open(struct inode *inode, struct file *file);
25static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); 25static int afs_readdir(struct file *file, void *dirent, filldir_t filldir);
26static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); 26static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
27static int afs_d_delete(const struct dentry *dentry); 27static int afs_d_delete(const struct dentry *dentry);
28static void afs_d_release(struct dentry *dentry); 28static void afs_d_release(struct dentry *dentry);
29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, 29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
30 loff_t fpos, u64 ino, unsigned dtype); 30 loff_t fpos, u64 ino, unsigned dtype);
31static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 31static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
32 struct nameidata *nd); 32 bool excl);
33static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); 33static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
34static int afs_rmdir(struct inode *dir, struct dentry *dentry); 34static int afs_rmdir(struct inode *dir, struct dentry *dentry);
35static int afs_unlink(struct inode *dir, struct dentry *dentry); 35static int afs_unlink(struct inode *dir, struct dentry *dentry);
@@ -516,7 +516,7 @@ out:
516 * look up an entry in a directory 516 * look up an entry in a directory
517 */ 517 */
518static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, 518static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
519 struct nameidata *nd) 519 unsigned int flags)
520{ 520{
521 struct afs_vnode *vnode; 521 struct afs_vnode *vnode;
522 struct afs_fid fid; 522 struct afs_fid fid;
@@ -598,7 +598,7 @@ success:
598 * - NOTE! the hit can be a negative hit too, so we can't assume we have an 598 * - NOTE! the hit can be a negative hit too, so we can't assume we have an
599 * inode 599 * inode
600 */ 600 */
601static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) 601static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
602{ 602{
603 struct afs_vnode *vnode, *dir; 603 struct afs_vnode *vnode, *dir;
604 struct afs_fid uninitialized_var(fid); 604 struct afs_fid uninitialized_var(fid);
@@ -607,7 +607,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
607 void *dir_version; 607 void *dir_version;
608 int ret; 608 int ret;
609 609
610 if (nd->flags & LOOKUP_RCU) 610 if (flags & LOOKUP_RCU)
611 return -ECHILD; 611 return -ECHILD;
612 612
613 vnode = AFS_FS_I(dentry->d_inode); 613 vnode = AFS_FS_I(dentry->d_inode);
@@ -949,7 +949,7 @@ error:
949 * create a regular file on an AFS filesystem 949 * create a regular file on an AFS filesystem
950 */ 950 */
951static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 951static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
952 struct nameidata *nd) 952 bool excl)
953{ 953{
954 struct afs_file_status status; 954 struct afs_file_status status;
955 struct afs_callback cb; 955 struct afs_callback cb;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 298cf8919ec7..9682c33d5daf 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -22,7 +22,7 @@
22 22
23static struct dentry *afs_mntpt_lookup(struct inode *dir, 23static struct dentry *afs_mntpt_lookup(struct inode *dir,
24 struct dentry *dentry, 24 struct dentry *dentry,
25 struct nameidata *nd); 25 unsigned int flags);
26static int afs_mntpt_open(struct inode *inode, struct file *file); 26static int afs_mntpt_open(struct inode *inode, struct file *file);
27static void afs_mntpt_expiry_timed_out(struct work_struct *work); 27static void afs_mntpt_expiry_timed_out(struct work_struct *work);
28 28
@@ -104,7 +104,7 @@ out:
104 */ 104 */
105static struct dentry *afs_mntpt_lookup(struct inode *dir, 105static struct dentry *afs_mntpt_lookup(struct inode *dir,
106 struct dentry *dentry, 106 struct dentry *dentry,
107 struct nameidata *nd) 107 unsigned int flags)
108{ 108{
109 _enter("%p,%p{%p{%s},%s}", 109 _enter("%p,%p{%p{%s},%s}",
110 dir, 110 dir,
diff --git a/fs/afs/super.c b/fs/afs/super.c
index f02b31e7e648..df8c6047c2a1 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -395,7 +395,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
395 as->volume = vol; 395 as->volume = vol;
396 396
397 /* allocate a deviceless superblock */ 397 /* allocate a deviceless superblock */
398 sb = sget(fs_type, afs_test_super, afs_set_super, as); 398 sb = sget(fs_type, afs_test_super, afs_set_super, flags, as);
399 if (IS_ERR(sb)) { 399 if (IS_ERR(sb)) {
400 ret = PTR_ERR(sb); 400 ret = PTR_ERR(sb);
401 afs_put_volume(vol); 401 afs_put_volume(vol);
@@ -406,7 +406,6 @@ static struct dentry *afs_mount(struct file_system_type *fs_type,
406 if (!sb->s_root) { 406 if (!sb->s_root) {
407 /* initial superblock/root creation */ 407 /* initial superblock/root creation */
408 _debug("create"); 408 _debug("create");
409 sb->s_flags = flags;
410 ret = afs_fill_super(sb, &params); 409 ret = afs_fill_super(sb, &params);
411 if (ret < 0) { 410 if (ret < 0) {
412 deactivate_locked_super(sb); 411 deactivate_locked_super(sb);
diff --git a/fs/aio.c b/fs/aio.c
index 55c4c7656053..71f613cf4a85 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -56,13 +56,6 @@ static struct kmem_cache *kioctx_cachep;
56 56
57static struct workqueue_struct *aio_wq; 57static struct workqueue_struct *aio_wq;
58 58
59/* Used for rare fput completion. */
60static void aio_fput_routine(struct work_struct *);
61static DECLARE_WORK(fput_work, aio_fput_routine);
62
63static DEFINE_SPINLOCK(fput_lock);
64static LIST_HEAD(fput_head);
65
66static void aio_kick_handler(struct work_struct *); 59static void aio_kick_handler(struct work_struct *);
67static void aio_queue_work(struct kioctx *); 60static void aio_queue_work(struct kioctx *);
68 61
@@ -479,7 +472,6 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
479{ 472{
480 unsigned short allocated, to_alloc; 473 unsigned short allocated, to_alloc;
481 long avail; 474 long avail;
482 bool called_fput = false;
483 struct kiocb *req, *n; 475 struct kiocb *req, *n;
484 struct aio_ring *ring; 476 struct aio_ring *ring;
485 477
@@ -495,28 +487,11 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
495 if (allocated == 0) 487 if (allocated == 0)
496 goto out; 488 goto out;
497 489
498retry:
499 spin_lock_irq(&ctx->ctx_lock); 490 spin_lock_irq(&ctx->ctx_lock);
500 ring = kmap_atomic(ctx->ring_info.ring_pages[0]); 491 ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
501 492
502 avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active; 493 avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
503 BUG_ON(avail < 0); 494 BUG_ON(avail < 0);
504 if (avail == 0 && !called_fput) {
505 /*
506 * Handle a potential starvation case. It is possible that
507 * we hold the last reference on a struct file, causing us
508 * to delay the final fput to non-irq context. In this case,
509 * ctx->reqs_active is artificially high. Calling the fput
510 * routine here may free up a slot in the event completion
511 * ring, allowing this allocation to succeed.
512 */
513 kunmap_atomic(ring);
514 spin_unlock_irq(&ctx->ctx_lock);
515 aio_fput_routine(NULL);
516 called_fput = true;
517 goto retry;
518 }
519
520 if (avail < allocated) { 495 if (avail < allocated) {
521 /* Trim back the number of requests. */ 496 /* Trim back the number of requests. */
522 list_for_each_entry_safe(req, n, &batch->head, ki_batch) { 497 list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
@@ -570,36 +545,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
570 wake_up_all(&ctx->wait); 545 wake_up_all(&ctx->wait);
571} 546}
572 547
573static void aio_fput_routine(struct work_struct *data)
574{
575 spin_lock_irq(&fput_lock);
576 while (likely(!list_empty(&fput_head))) {
577 struct kiocb *req = list_kiocb(fput_head.next);
578 struct kioctx *ctx = req->ki_ctx;
579
580 list_del(&req->ki_list);
581 spin_unlock_irq(&fput_lock);
582
583 /* Complete the fput(s) */
584 if (req->ki_filp != NULL)
585 fput(req->ki_filp);
586
587 /* Link the iocb into the context's free list */
588 rcu_read_lock();
589 spin_lock_irq(&ctx->ctx_lock);
590 really_put_req(ctx, req);
591 /*
592 * at that point ctx might've been killed, but actual
593 * freeing is RCU'd
594 */
595 spin_unlock_irq(&ctx->ctx_lock);
596 rcu_read_unlock();
597
598 spin_lock_irq(&fput_lock);
599 }
600 spin_unlock_irq(&fput_lock);
601}
602
603/* __aio_put_req 548/* __aio_put_req
604 * Returns true if this put was the last user of the request. 549 * Returns true if this put was the last user of the request.
605 */ 550 */
@@ -618,21 +563,9 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
618 req->ki_cancel = NULL; 563 req->ki_cancel = NULL;
619 req->ki_retry = NULL; 564 req->ki_retry = NULL;
620 565
621 /* 566 fput(req->ki_filp);
622 * Try to optimize the aio and eventfd file* puts, by avoiding to 567 req->ki_filp = NULL;
623 * schedule work in case it is not final fput() time. In normal cases, 568 really_put_req(ctx, req);
624 * we would not be holding the last reference to the file*, so
625 * this function will be executed w/out any aio kthread wakeup.
626 */
627 if (unlikely(!fput_atomic(req->ki_filp))) {
628 spin_lock(&fput_lock);
629 list_add(&req->ki_list, &fput_head);
630 spin_unlock(&fput_lock);
631 schedule_work(&fput_work);
632 } else {
633 req->ki_filp = NULL;
634 really_put_req(ctx, req);
635 }
636 return 1; 569 return 1;
637} 570}
638 571
diff --git a/fs/attr.c b/fs/attr.c
index 0da90951d277..29e38a1f7f77 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -171,6 +171,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
171 struct timespec now; 171 struct timespec now;
172 unsigned int ia_valid = attr->ia_valid; 172 unsigned int ia_valid = attr->ia_valid;
173 173
174 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
175
174 if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) { 176 if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
175 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 177 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
176 return -EPERM; 178 return -EPERM;
@@ -250,5 +252,4 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
250 252
251 return error; 253 return error;
252} 254}
253
254EXPORT_SYMBOL(notify_change); 255EXPORT_SYMBOL(notify_change);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index aa9103f8f01b..abf645c1703b 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -257,8 +257,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
257 * corresponding to the autofs fs we want to open. 257 * corresponding to the autofs fs we want to open.
258 */ 258 */
259 259
260 filp = dentry_open(path.dentry, path.mnt, O_RDONLY, 260 filp = dentry_open(&path, O_RDONLY, current_cred());
261 current_cred()); 261 path_put(&path);
262 if (IS_ERR(filp)) { 262 if (IS_ERR(filp)) {
263 err = PTR_ERR(filp); 263 err = PTR_ERR(filp);
264 goto out; 264 goto out;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 75e5f1c8e028..e7396cfdb109 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -32,7 +32,7 @@ static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
32static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); 32static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
33#endif 33#endif
34static int autofs4_dir_open(struct inode *inode, struct file *file); 34static int autofs4_dir_open(struct inode *inode, struct file *file);
35static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); 35static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int);
36static struct vfsmount *autofs4_d_automount(struct path *); 36static struct vfsmount *autofs4_d_automount(struct path *);
37static int autofs4_d_manage(struct dentry *, bool); 37static int autofs4_d_manage(struct dentry *, bool);
38static void autofs4_dentry_release(struct dentry *); 38static void autofs4_dentry_release(struct dentry *);
@@ -458,7 +458,7 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
458} 458}
459 459
460/* Lookups in the root directory */ 460/* Lookups in the root directory */
461static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 461static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
462{ 462{
463 struct autofs_sb_info *sbi; 463 struct autofs_sb_info *sbi;
464 struct autofs_info *ino; 464 struct autofs_info *ino;
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 1b35d6bd06b0..b1342ffb3cf6 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -173,13 +173,13 @@ static const struct file_operations bad_file_ops =
173}; 173};
174 174
175static int bad_inode_create (struct inode *dir, struct dentry *dentry, 175static int bad_inode_create (struct inode *dir, struct dentry *dentry,
176 umode_t mode, struct nameidata *nd) 176 umode_t mode, bool excl)
177{ 177{
178 return -EIO; 178 return -EIO;
179} 179}
180 180
181static struct dentry *bad_inode_lookup(struct inode *dir, 181static struct dentry *bad_inode_lookup(struct inode *dir,
182 struct dentry *dentry, struct nameidata *nd) 182 struct dentry *dentry, unsigned int flags)
183{ 183{
184 return ERR_PTR(-EIO); 184 return ERR_PTR(-EIO);
185} 185}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e18da23d42b5..cf7f3c67c8b7 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -34,7 +34,7 @@ static int befs_readdir(struct file *, void *, filldir_t);
34static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); 34static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int);
35static int befs_readpage(struct file *file, struct page *page); 35static int befs_readpage(struct file *file, struct page *page);
36static sector_t befs_bmap(struct address_space *mapping, sector_t block); 36static sector_t befs_bmap(struct address_space *mapping, sector_t block);
37static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *); 37static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int);
38static struct inode *befs_iget(struct super_block *, unsigned long); 38static struct inode *befs_iget(struct super_block *, unsigned long);
39static struct inode *befs_alloc_inode(struct super_block *sb); 39static struct inode *befs_alloc_inode(struct super_block *sb);
40static void befs_destroy_inode(struct inode *inode); 40static void befs_destroy_inode(struct inode *inode);
@@ -159,7 +159,7 @@ befs_get_block(struct inode *inode, sector_t block,
159} 159}
160 160
161static struct dentry * 161static struct dentry *
162befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 162befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
163{ 163{
164 struct inode *inode = NULL; 164 struct inode *inode = NULL;
165 struct super_block *sb = dir->i_sb; 165 struct super_block *sb = dir->i_sb;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d12c7966db27..2785ef91191a 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -85,7 +85,7 @@ const struct file_operations bfs_dir_operations = {
85extern void dump_imap(const char *, struct super_block *); 85extern void dump_imap(const char *, struct super_block *);
86 86
87static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 87static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
88 struct nameidata *nd) 88 bool excl)
89{ 89{
90 int err; 90 int err;
91 struct inode *inode; 91 struct inode *inode;
@@ -133,7 +133,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
133} 133}
134 134
135static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, 135static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
136 struct nameidata *nd) 136 unsigned int flags)
137{ 137{
138 struct inode *inode = NULL; 138 struct inode *inode = NULL;
139 struct buffer_head *bh; 139 struct buffer_head *bh;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c2bbe1fb1326..1e519195d45b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1710,3 +1710,39 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
1710 return res; 1710 return res;
1711} 1711}
1712EXPORT_SYMBOL(__invalidate_device); 1712EXPORT_SYMBOL(__invalidate_device);
1713
1714void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
1715{
1716 struct inode *inode, *old_inode = NULL;
1717
1718 spin_lock(&inode_sb_list_lock);
1719 list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
1720 struct address_space *mapping = inode->i_mapping;
1721
1722 spin_lock(&inode->i_lock);
1723 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
1724 mapping->nrpages == 0) {
1725 spin_unlock(&inode->i_lock);
1726 continue;
1727 }
1728 __iget(inode);
1729 spin_unlock(&inode->i_lock);
1730 spin_unlock(&inode_sb_list_lock);
1731 /*
1732 * We hold a reference to 'inode' so it couldn't have been
1733 * removed from s_inodes list while we dropped the
1734 * inode_sb_list_lock. We cannot iput the inode now as we can
1735 * be holding the last reference and we cannot iput it under
1736 * inode_sb_list_lock. So we keep the reference and iput it
1737 * later.
1738 */
1739 iput(old_inode);
1740 old_inode = inode;
1741
1742 func(I_BDEV(inode), arg);
1743
1744 spin_lock(&inode_sb_list_lock);
1745 }
1746 spin_unlock(&inode_sb_list_lock);
1747 iput(old_inode);
1748}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 01c21b6c6d43..deafe19c34b5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -929,7 +929,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
929 929
930 930
931/** 931/**
932 * convert_extent - convert all bits in a given range from one bit to another 932 * convert_extent_bit - convert all bits in a given range from one bit to
933 * another
933 * @tree: the io tree to search 934 * @tree: the io tree to search
934 * @start: the start offset in bytes 935 * @start: the start offset in bytes
935 * @end: the end offset in bytes (inclusive) 936 * @end: the end offset in bytes (inclusive)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a7d1921ac76b..fb8d671d00e6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4247,7 +4247,7 @@ static void btrfs_dentry_release(struct dentry *dentry)
4247} 4247}
4248 4248
4249static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 4249static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4250 struct nameidata *nd) 4250 unsigned int flags)
4251{ 4251{
4252 struct dentry *ret; 4252 struct dentry *ret;
4253 4253
@@ -4893,7 +4893,7 @@ out_unlock:
4893} 4893}
4894 4894
4895static int btrfs_create(struct inode *dir, struct dentry *dentry, 4895static int btrfs_create(struct inode *dir, struct dentry *dentry,
4896 umode_t mode, struct nameidata *nd) 4896 umode_t mode, bool excl)
4897{ 4897{
4898 struct btrfs_trans_handle *trans; 4898 struct btrfs_trans_handle *trans;
4899 struct btrfs_root *root = BTRFS_I(dir)->root; 4899 struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -6987,7 +6987,7 @@ void btrfs_destroy_inode(struct inode *inode)
6987 struct btrfs_ordered_extent *ordered; 6987 struct btrfs_ordered_extent *ordered;
6988 struct btrfs_root *root = BTRFS_I(inode)->root; 6988 struct btrfs_root *root = BTRFS_I(inode)->root;
6989 6989
6990 WARN_ON(!list_empty(&inode->i_dentry)); 6990 WARN_ON(!hlist_empty(&inode->i_dentry));
6991 WARN_ON(inode->i_data.nrpages); 6991 WARN_ON(inode->i_data.nrpages);
6992 WARN_ON(BTRFS_I(inode)->outstanding_extents); 6992 WARN_ON(BTRFS_I(inode)->outstanding_extents);
6993 WARN_ON(BTRFS_I(inode)->reserved_extents); 6993 WARN_ON(BTRFS_I(inode)->reserved_extents);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0e92e5763005..1e9f6c019ad0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3268,7 +3268,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3268 if (fs_info->sb->s_flags & MS_RDONLY) 3268 if (fs_info->sb->s_flags & MS_RDONLY)
3269 return -EROFS; 3269 return -EROFS;
3270 3270
3271 ret = mnt_want_write(file->f_path.mnt); 3271 ret = mnt_want_write_file(file);
3272 if (ret) 3272 if (ret)
3273 return ret; 3273 return ret;
3274 3274
@@ -3338,7 +3338,7 @@ out_bargs:
3338out: 3338out:
3339 mutex_unlock(&fs_info->balance_mutex); 3339 mutex_unlock(&fs_info->balance_mutex);
3340 mutex_unlock(&fs_info->volume_mutex); 3340 mutex_unlock(&fs_info->volume_mutex);
3341 mnt_drop_write(file->f_path.mnt); 3341 mnt_drop_write_file(file);
3342 return ret; 3342 return ret;
3343} 3343}
3344 3344
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e23991574fdf..b19d75567728 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1068,7 +1068,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1068 } 1068 }
1069 1069
1070 bdev = fs_devices->latest_bdev; 1070 bdev = fs_devices->latest_bdev;
1071 s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info); 1071 s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC,
1072 fs_info);
1072 if (IS_ERR(s)) { 1073 if (IS_ERR(s)) {
1073 error = PTR_ERR(s); 1074 error = PTR_ERR(s);
1074 goto error_close_devices; 1075 goto error_close_devices;
@@ -1082,7 +1083,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
1082 } else { 1083 } else {
1083 char b[BDEVNAME_SIZE]; 1084 char b[BDEVNAME_SIZE];
1084 1085
1085 s->s_flags = flags | MS_NOSEC;
1086 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 1086 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1087 btrfs_sb(s)->bdev_holder = fs_type; 1087 btrfs_sb(s)->bdev_holder = fs_type;
1088 error = btrfs_fill_super(s, fs_devices, data, 1088 error = btrfs_fill_super(s, fs_devices, data,
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 7f0771d3894e..b0b5f7cdfffa 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -567,7 +567,7 @@ lookup_again:
567 if (ret < 0) 567 if (ret < 0)
568 goto create_error; 568 goto create_error;
569 start = jiffies; 569 start = jiffies;
570 ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); 570 ret = vfs_create(dir->d_inode, next, S_IFREG, true);
571 cachefiles_hist(cachefiles_create_histogram, start); 571 cachefiles_hist(cachefiles_create_histogram, start);
572 if (ret < 0) 572 if (ret < 0)
573 goto create_error; 573 goto create_error;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 0e3c0924cc3a..c0353dfac51f 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -891,6 +891,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
891 struct cachefiles_cache *cache; 891 struct cachefiles_cache *cache;
892 mm_segment_t old_fs; 892 mm_segment_t old_fs;
893 struct file *file; 893 struct file *file;
894 struct path path;
894 loff_t pos, eof; 895 loff_t pos, eof;
895 size_t len; 896 size_t len;
896 void *data; 897 void *data;
@@ -916,10 +917,9 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
916 917
917 /* write the page to the backing filesystem and let it store it in its 918 /* write the page to the backing filesystem and let it store it in its
918 * own time */ 919 * own time */
919 dget(object->backer); 920 path.mnt = cache->mnt;
920 mntget(cache->mnt); 921 path.dentry = object->backer;
921 file = dentry_open(object->backer, cache->mnt, O_RDWR, 922 file = dentry_open(&path, O_RDWR, cache->cache_cred);
922 cache->cache_cred);
923 if (IS_ERR(file)) { 923 if (IS_ERR(file)) {
924 ret = PTR_ERR(file); 924 ret = PTR_ERR(file);
925 } else { 925 } else {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 3e8094be4604..00894ff9246c 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -576,7 +576,7 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry)
576 * the MDS so that it gets our 'caps wanted' value in a single op. 576 * the MDS so that it gets our 'caps wanted' value in a single op.
577 */ 577 */
578static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, 578static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
579 struct nameidata *nd) 579 unsigned int flags)
580{ 580{
581 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 581 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
582 struct ceph_mds_client *mdsc = fsc->mdsc; 582 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -594,14 +594,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
594 if (err < 0) 594 if (err < 0)
595 return ERR_PTR(err); 595 return ERR_PTR(err);
596 596
597 /* open (but not create!) intent? */
598 if (nd &&
599 (nd->flags & LOOKUP_OPEN) &&
600 !(nd->intent.open.flags & O_CREAT)) {
601 int mode = nd->intent.open.create_mode & ~current->fs->umask;
602 return ceph_lookup_open(dir, dentry, nd, mode, 1);
603 }
604
605 /* can we conclude ENOENT locally? */ 597 /* can we conclude ENOENT locally? */
606 if (dentry->d_inode == NULL) { 598 if (dentry->d_inode == NULL) {
607 struct ceph_inode_info *ci = ceph_inode(dir); 599 struct ceph_inode_info *ci = ceph_inode(dir);
@@ -642,13 +634,51 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
642 return dentry; 634 return dentry;
643} 635}
644 636
637int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
638 struct file *file, unsigned flags, umode_t mode,
639 int *opened)
640{
641 int err;
642 struct dentry *res = NULL;
643
644 if (!(flags & O_CREAT)) {
645 if (dentry->d_name.len > NAME_MAX)
646 return -ENAMETOOLONG;
647
648 err = ceph_init_dentry(dentry);
649 if (err < 0)
650 return err;
651
652 return ceph_lookup_open(dir, dentry, file, flags, mode, opened);
653 }
654
655 if (d_unhashed(dentry)) {
656 res = ceph_lookup(dir, dentry, 0);
657 if (IS_ERR(res))
658 return PTR_ERR(res);
659
660 if (res)
661 dentry = res;
662 }
663
664 /* We don't deal with positive dentries here */
665 if (dentry->d_inode)
666 return finish_no_open(file, res);
667
668 *opened |= FILE_CREATED;
669 err = ceph_lookup_open(dir, dentry, file, flags, mode, opened);
670 dput(res);
671
672 return err;
673}
674
645/* 675/*
646 * If we do a create but get no trace back from the MDS, follow up with 676 * If we do a create but get no trace back from the MDS, follow up with
647 * a lookup (the VFS expects us to link up the provided dentry). 677 * a lookup (the VFS expects us to link up the provided dentry).
648 */ 678 */
649int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) 679int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
650{ 680{
651 struct dentry *result = ceph_lookup(dir, dentry, NULL); 681 struct dentry *result = ceph_lookup(dir, dentry, 0);
652 682
653 if (result && !IS_ERR(result)) { 683 if (result && !IS_ERR(result)) {
654 /* 684 /*
@@ -700,25 +730,9 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
700} 730}
701 731
702static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, 732static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
703 struct nameidata *nd) 733 bool excl)
704{ 734{
705 dout("create in dir %p dentry %p name '%.*s'\n", 735 return ceph_mknod(dir, dentry, mode, 0);
706 dir, dentry, dentry->d_name.len, dentry->d_name.name);
707
708 if (ceph_snap(dir) != CEPH_NOSNAP)
709 return -EROFS;
710
711 if (nd) {
712 BUG_ON((nd->flags & LOOKUP_OPEN) == 0);
713 dentry = ceph_lookup_open(dir, dentry, nd, mode, 0);
714 /* hrm, what should i do here if we get aliased? */
715 if (IS_ERR(dentry))
716 return PTR_ERR(dentry);
717 return 0;
718 }
719
720 /* fall back to mknod */
721 return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0);
722} 736}
723 737
724static int ceph_symlink(struct inode *dir, struct dentry *dentry, 738static int ceph_symlink(struct inode *dir, struct dentry *dentry,
@@ -1028,12 +1042,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
1028/* 1042/*
1029 * Check if cached dentry can be trusted. 1043 * Check if cached dentry can be trusted.
1030 */ 1044 */
1031static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) 1045static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
1032{ 1046{
1033 int valid = 0; 1047 int valid = 0;
1034 struct inode *dir; 1048 struct inode *dir;
1035 1049
1036 if (nd && nd->flags & LOOKUP_RCU) 1050 if (flags & LOOKUP_RCU)
1037 return -ECHILD; 1051 return -ECHILD;
1038 1052
1039 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, 1053 dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
@@ -1080,7 +1094,7 @@ static void ceph_d_release(struct dentry *dentry)
1080} 1094}
1081 1095
1082static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1096static int ceph_snapdir_d_revalidate(struct dentry *dentry,
1083 struct nameidata *nd) 1097 unsigned int flags)
1084{ 1098{
1085 /* 1099 /*
1086 * Eventually, we'll want to revalidate snapped metadata 1100 * Eventually, we'll want to revalidate snapped metadata
@@ -1357,6 +1371,7 @@ const struct inode_operations ceph_dir_iops = {
1357 .rmdir = ceph_unlink, 1371 .rmdir = ceph_unlink,
1358 .rename = ceph_rename, 1372 .rename = ceph_rename,
1359 .create = ceph_create, 1373 .create = ceph_create,
1374 .atomic_open = ceph_atomic_open,
1360}; 1375};
1361 1376
1362const struct dentry_operations ceph_dentry_ops = { 1377const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 988d4f302e48..1b81d6c31878 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -213,22 +213,15 @@ out:
213 * may_open() fails, the struct *file gets cleaned up (i.e. 213 * may_open() fails, the struct *file gets cleaned up (i.e.
214 * ceph_release gets called). So fear not! 214 * ceph_release gets called). So fear not!
215 */ 215 */
216/* 216int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
217 * flags 217 struct file *file, unsigned flags, umode_t mode,
218 * path_lookup_open -> LOOKUP_OPEN 218 int *opened)
219 * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE
220 */
221struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
222 struct nameidata *nd, int mode,
223 int locked_dir)
224{ 219{
225 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 220 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
226 struct ceph_mds_client *mdsc = fsc->mdsc; 221 struct ceph_mds_client *mdsc = fsc->mdsc;
227 struct file *file;
228 struct ceph_mds_request *req; 222 struct ceph_mds_request *req;
229 struct dentry *ret; 223 struct dentry *ret;
230 int err; 224 int err;
231 int flags = nd->intent.open.flags;
232 225
233 dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", 226 dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
234 dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); 227 dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@@ -236,7 +229,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
236 /* do the open */ 229 /* do the open */
237 req = prepare_open_request(dir->i_sb, flags, mode); 230 req = prepare_open_request(dir->i_sb, flags, mode);
238 if (IS_ERR(req)) 231 if (IS_ERR(req))
239 return ERR_CAST(req); 232 return PTR_ERR(req);
240 req->r_dentry = dget(dentry); 233 req->r_dentry = dget(dentry);
241 req->r_num_caps = 2; 234 req->r_num_caps = 2;
242 if (flags & O_CREAT) { 235 if (flags & O_CREAT) {
@@ -254,14 +247,17 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
254 err = ceph_handle_notrace_create(dir, dentry); 247 err = ceph_handle_notrace_create(dir, dentry);
255 if (err) 248 if (err)
256 goto out; 249 goto out;
257 file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open); 250 err = finish_open(file, req->r_dentry, ceph_open, opened);
258 if (IS_ERR(file))
259 err = PTR_ERR(file);
260out: 251out:
261 ret = ceph_finish_lookup(req, dentry, err); 252 ret = ceph_finish_lookup(req, dentry, err);
262 ceph_mdsc_put_request(req); 253 ceph_mdsc_put_request(req);
263 dout("ceph_lookup_open result=%p\n", ret); 254 dout("ceph_lookup_open result=%p\n", ret);
264 return ret; 255
256 if (IS_ERR(ret))
257 return PTR_ERR(ret);
258
259 dput(ret);
260 return err;
265} 261}
266 262
267int ceph_release(struct inode *inode, struct file *file) 263int ceph_release(struct inode *inode, struct file *file)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 1e67dd7305a4..7076109f014d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -871,7 +871,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
871 871
872 if (ceph_test_opt(fsc->client, NOSHARE)) 872 if (ceph_test_opt(fsc->client, NOSHARE))
873 compare_super = NULL; 873 compare_super = NULL;
874 sb = sget(fs_type, compare_super, ceph_set_super, fsc); 874 sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
875 if (IS_ERR(sb)) { 875 if (IS_ERR(sb)) {
876 res = ERR_CAST(sb); 876 res = ERR_CAST(sb);
877 goto out; 877 goto out;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index fc35036d258d..f4d5522cb619 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -806,9 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages,
806 loff_t off, size_t len); 806 loff_t off, size_t len);
807extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); 807extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
808extern int ceph_open(struct inode *inode, struct file *file); 808extern int ceph_open(struct inode *inode, struct file *file);
809extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, 809extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
810 struct nameidata *nd, int mode, 810 struct file *od, unsigned flags,
811 int locked_dir); 811 umode_t mode, int *opened);
812extern int ceph_release(struct inode *inode, struct file *filp); 812extern int ceph_release(struct inode *inode, struct file *filp);
813 813
814/* dir.c */ 814/* dir.c */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8b6e344eb0ba..a7610cfedf0a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -257,7 +257,6 @@ cifs_alloc_inode(struct super_block *sb)
257static void cifs_i_callback(struct rcu_head *head) 257static void cifs_i_callback(struct rcu_head *head)
258{ 258{
259 struct inode *inode = container_of(head, struct inode, i_rcu); 259 struct inode *inode = container_of(head, struct inode, i_rcu);
260 INIT_LIST_HEAD(&inode->i_dentry);
261 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); 260 kmem_cache_free(cifs_inode_cachep, CIFS_I(inode));
262} 261}
263 262
@@ -638,7 +637,10 @@ cifs_do_mount(struct file_system_type *fs_type,
638 mnt_data.cifs_sb = cifs_sb; 637 mnt_data.cifs_sb = cifs_sb;
639 mnt_data.flags = flags; 638 mnt_data.flags = flags;
640 639
641 sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data); 640 /* BB should we make this contingent on mount parm? */
641 flags |= MS_NODIRATIME | MS_NOATIME;
642
643 sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data);
642 if (IS_ERR(sb)) { 644 if (IS_ERR(sb)) {
643 root = ERR_CAST(sb); 645 root = ERR_CAST(sb);
644 cifs_umount(cifs_sb); 646 cifs_umount(cifs_sb);
@@ -649,10 +651,6 @@ cifs_do_mount(struct file_system_type *fs_type,
649 cFYI(1, "Use existing superblock"); 651 cFYI(1, "Use existing superblock");
650 cifs_umount(cifs_sb); 652 cifs_umount(cifs_sb);
651 } else { 653 } else {
652 sb->s_flags = flags;
653 /* BB should we make this contingent on mount parm? */
654 sb->s_flags |= MS_NODIRATIME | MS_NOATIME;
655
656 rc = cifs_read_super(sb); 654 rc = cifs_read_super(sb);
657 if (rc) { 655 if (rc) {
658 root = ERR_PTR(rc); 656 root = ERR_PTR(rc);
@@ -778,6 +776,7 @@ struct file_system_type cifs_fs_type = {
778}; 776};
779const struct inode_operations cifs_dir_inode_ops = { 777const struct inode_operations cifs_dir_inode_ops = {
780 .create = cifs_create, 778 .create = cifs_create,
779 .atomic_open = cifs_atomic_open,
781 .lookup = cifs_lookup, 780 .lookup = cifs_lookup,
782 .getattr = cifs_getattr, 781 .getattr = cifs_getattr,
783 .unlink = cifs_unlink, 782 .unlink = cifs_unlink,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 65365358c976..1c49c5a9b27a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -45,9 +45,12 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf;
45extern const struct inode_operations cifs_dir_inode_ops; 45extern const struct inode_operations cifs_dir_inode_ops;
46extern struct inode *cifs_root_iget(struct super_block *); 46extern struct inode *cifs_root_iget(struct super_block *);
47extern int cifs_create(struct inode *, struct dentry *, umode_t, 47extern int cifs_create(struct inode *, struct dentry *, umode_t,
48 struct nameidata *); 48 bool excl);
49extern int cifs_atomic_open(struct inode *, struct dentry *,
50 struct file *, unsigned, umode_t,
51 int *);
49extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 52extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
50 struct nameidata *); 53 unsigned int);
51extern int cifs_unlink(struct inode *dir, struct dentry *dentry); 54extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
52extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); 55extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
53extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t); 56extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 5b400730c213..4ee522b3f66f 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -86,7 +86,31 @@ static struct {
86#endif /* CONFIG_CIFS_WEAK_PW_HASH */ 86#endif /* CONFIG_CIFS_WEAK_PW_HASH */
87#endif /* CIFS_POSIX */ 87#endif /* CIFS_POSIX */
88 88
89/* Forward declarations */ 89#ifdef CONFIG_HIGHMEM
90/*
91 * On arches that have high memory, kmap address space is limited. By
92 * serializing the kmap operations on those arches, we ensure that we don't
93 * end up with a bunch of threads in writeback with partially mapped page
94 * arrays, stuck waiting for kmap to come back. That situation prevents
95 * progress and can deadlock.
96 */
97static DEFINE_MUTEX(cifs_kmap_mutex);
98
99static inline void
100cifs_kmap_lock(void)
101{
102 mutex_lock(&cifs_kmap_mutex);
103}
104
105static inline void
106cifs_kmap_unlock(void)
107{
108 mutex_unlock(&cifs_kmap_mutex);
109}
110#else /* !CONFIG_HIGHMEM */
111#define cifs_kmap_lock() do { ; } while(0)
112#define cifs_kmap_unlock() do { ; } while(0)
113#endif /* CONFIG_HIGHMEM */
90 114
91/* Mark as invalid, all open files on tree connections since they 115/* Mark as invalid, all open files on tree connections since they
92 were closed when session to server was lost */ 116 were closed when session to server was lost */
@@ -1503,7 +1527,9 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
1503 } 1527 }
1504 1528
1505 /* marshal up the page array */ 1529 /* marshal up the page array */
1530 cifs_kmap_lock();
1506 len = rdata->marshal_iov(rdata, data_len); 1531 len = rdata->marshal_iov(rdata, data_len);
1532 cifs_kmap_unlock();
1507 data_len -= len; 1533 data_len -= len;
1508 1534
1509 /* issue the read if we have any iovecs left to fill */ 1535 /* issue the read if we have any iovecs left to fill */
@@ -2069,7 +2095,9 @@ cifs_async_writev(struct cifs_writedata *wdata)
2069 * and set the iov_len properly for each one. It may also set 2095 * and set the iov_len properly for each one. It may also set
2070 * wdata->bytes too. 2096 * wdata->bytes too.
2071 */ 2097 */
2098 cifs_kmap_lock();
2072 wdata->marshal_iov(iov, wdata); 2099 wdata->marshal_iov(iov, wdata);
2100 cifs_kmap_unlock();
2073 2101
2074 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes); 2102 cFYI(1, "async write at %llu %u bytes", wdata->offset, wdata->bytes);
2075 2103
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 0ae86ddf2213..94b7788c3189 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3445,6 +3445,18 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
3445#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) 3445#define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024)
3446#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) 3446#define CIFS_DEFAULT_NON_POSIX_WSIZE (65536)
3447 3447
3448/*
3449 * On hosts with high memory, we can't currently support wsize/rsize that are
3450 * larger than we can kmap at once. Cap the rsize/wsize at
3451 * LAST_PKMAP * PAGE_SIZE. We'll never be able to fill a read or write request
3452 * larger than that anyway.
3453 */
3454#ifdef CONFIG_HIGHMEM
3455#define CIFS_KMAP_SIZE_LIMIT (LAST_PKMAP * PAGE_CACHE_SIZE)
3456#else /* CONFIG_HIGHMEM */
3457#define CIFS_KMAP_SIZE_LIMIT (1<<24)
3458#endif /* CONFIG_HIGHMEM */
3459
3448static unsigned int 3460static unsigned int
3449cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info) 3461cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3450{ 3462{
@@ -3475,6 +3487,9 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3475 wsize = min_t(unsigned int, wsize, 3487 wsize = min_t(unsigned int, wsize,
3476 server->maxBuf - sizeof(WRITE_REQ) + 4); 3488 server->maxBuf - sizeof(WRITE_REQ) + 4);
3477 3489
3490 /* limit to the amount that we can kmap at once */
3491 wsize = min_t(unsigned int, wsize, CIFS_KMAP_SIZE_LIMIT);
3492
3478 /* hard limit of CIFS_MAX_WSIZE */ 3493 /* hard limit of CIFS_MAX_WSIZE */
3479 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE); 3494 wsize = min_t(unsigned int, wsize, CIFS_MAX_WSIZE);
3480 3495
@@ -3516,6 +3531,9 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
3516 if (!(server->capabilities & CAP_LARGE_READ_X)) 3531 if (!(server->capabilities & CAP_LARGE_READ_X))
3517 rsize = min_t(unsigned int, CIFSMaxBufSize, rsize); 3532 rsize = min_t(unsigned int, CIFSMaxBufSize, rsize);
3518 3533
3534 /* limit to the amount that we can kmap at once */
3535 rsize = min_t(unsigned int, rsize, CIFS_KMAP_SIZE_LIMIT);
3536
3519 /* hard limit of CIFS_MAX_RSIZE */ 3537 /* hard limit of CIFS_MAX_RSIZE */
3520 rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE); 3538 rsize = min_t(unsigned int, rsize, CIFS_MAX_RSIZE);
3521 3539
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index ec4e9a2a12f8..a180265a10b5 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -133,108 +133,141 @@ cifs_bp_rename_retry:
133 return full_path; 133 return full_path;
134} 134}
135 135
136/*
137 * Don't allow the separator character in a path component.
138 * The VFS will not allow "/", but "\" is allowed by posix.
139 */
140static int
141check_name(struct dentry *direntry)
142{
143 struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
144 int i;
145
146 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
147 for (i = 0; i < direntry->d_name.len; i++) {
148 if (direntry->d_name.name[i] == '\\') {
149 cFYI(1, "Invalid file name");
150 return -EINVAL;
151 }
152 }
153 }
154 return 0;
155}
156
157
136/* Inode operations in similar order to how they appear in Linux file fs.h */ 158/* Inode operations in similar order to how they appear in Linux file fs.h */
137 159
138int 160static int cifs_do_create(struct inode *inode, struct dentry *direntry,
139cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, 161 int xid, struct tcon_link *tlink, unsigned oflags,
140 struct nameidata *nd) 162 umode_t mode, __u32 *oplock, __u16 *fileHandle,
163 int *created)
141{ 164{
142 int rc = -ENOENT; 165 int rc = -ENOENT;
143 int xid;
144 int create_options = CREATE_NOT_DIR; 166 int create_options = CREATE_NOT_DIR;
145 __u32 oplock = 0; 167 int desiredAccess;
146 int oflags; 168 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
147 /* 169 struct cifs_tcon *tcon = tlink_tcon(tlink);
148 * BB below access is probably too much for mknod to request
149 * but we have to do query and setpathinfo so requesting
150 * less could fail (unless we want to request getatr and setatr
151 * permissions (only). At least for POSIX we do not have to
152 * request so much.
153 */
154 int desiredAccess = GENERIC_READ | GENERIC_WRITE;
155 __u16 fileHandle;
156 struct cifs_sb_info *cifs_sb;
157 struct tcon_link *tlink;
158 struct cifs_tcon *tcon;
159 char *full_path = NULL; 170 char *full_path = NULL;
160 FILE_ALL_INFO *buf = NULL; 171 FILE_ALL_INFO *buf = NULL;
161 struct inode *newinode = NULL; 172 struct inode *newinode = NULL;
162 int disposition = FILE_OVERWRITE_IF; 173 int disposition;
163
164 xid = GetXid();
165
166 cifs_sb = CIFS_SB(inode->i_sb);
167 tlink = cifs_sb_tlink(cifs_sb);
168 if (IS_ERR(tlink)) {
169 FreeXid(xid);
170 return PTR_ERR(tlink);
171 }
172 tcon = tlink_tcon(tlink);
173 174
175 *oplock = 0;
174 if (tcon->ses->server->oplocks) 176 if (tcon->ses->server->oplocks)
175 oplock = REQ_OPLOCK; 177 *oplock = REQ_OPLOCK;
176
177 if (nd)
178 oflags = nd->intent.open.file->f_flags;
179 else
180 oflags = O_RDONLY | O_CREAT;
181 178
182 full_path = build_path_from_dentry(direntry); 179 full_path = build_path_from_dentry(direntry);
183 if (full_path == NULL) { 180 if (full_path == NULL) {
184 rc = -ENOMEM; 181 rc = -ENOMEM;
185 goto cifs_create_out; 182 goto out;
186 } 183 }
187 184
188 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && 185 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
186 !tcon->broken_posix_open &&
189 (CIFS_UNIX_POSIX_PATH_OPS_CAP & 187 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
190 le64_to_cpu(tcon->fsUnixInfo.Capability))) { 188 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
191 rc = cifs_posix_open(full_path, &newinode, 189 rc = cifs_posix_open(full_path, &newinode,
192 inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); 190 inode->i_sb, mode, oflags, oplock, fileHandle, xid);
193 /* EIO could indicate that (posix open) operation is not 191 switch (rc) {
194 supported, despite what server claimed in capability 192 case 0:
195 negotiation. EREMOTE indicates DFS junction, which is not 193 if (newinode == NULL) {
196 handled in posix open */ 194 /* query inode info */
197
198 if (rc == 0) {
199 if (newinode == NULL) /* query inode info */
200 goto cifs_create_get_file_info; 195 goto cifs_create_get_file_info;
201 else /* success, no need to query */ 196 }
202 goto cifs_create_set_dentry; 197
203 } else if ((rc != -EIO) && (rc != -EREMOTE) && 198 if (!S_ISREG(newinode->i_mode)) {
204 (rc != -EOPNOTSUPP) && (rc != -EINVAL)) 199 /*
205 goto cifs_create_out; 200 * The server may allow us to open things like
206 /* else fallthrough to retry, using older open call, this is 201 * FIFOs, but the client isn't set up to deal
207 case where server does not support this SMB level, and 202 * with that. If it's not a regular file, just
208 falsely claims capability (also get here for DFS case 203 * close it and proceed as if it were a normal
209 which should be rare for path not covered on files) */ 204 * lookup.
210 } 205 */
206 CIFSSMBClose(xid, tcon, *fileHandle);
207 goto cifs_create_get_file_info;
208 }
209 /* success, no need to query */
210 goto cifs_create_set_dentry;
211
212 case -ENOENT:
213 goto cifs_create_get_file_info;
214
215 case -EIO:
216 case -EINVAL:
217 /*
218 * EIO could indicate that (posix open) operation is not
219 * supported, despite what server claimed in capability
220 * negotiation.
221 *
222 * POSIX open in samba versions 3.3.1 and earlier could
223 * incorrectly fail with invalid parameter.
224 */
225 tcon->broken_posix_open = true;
226 break;
227
228 case -EREMOTE:
229 case -EOPNOTSUPP:
230 /*
231 * EREMOTE indicates DFS junction, which is not handled
232 * in posix open. If either that or op not supported
233 * returned, follow the normal lookup.
234 */
235 break;
211 236
212 if (nd) { 237 default:
213 /* if the file is going to stay open, then we 238 goto out;
214 need to set the desired access properly */ 239 }
215 desiredAccess = 0; 240 /*
216 if (OPEN_FMODE(oflags) & FMODE_READ) 241 * fallthrough to retry, using older open call, this is case
217 desiredAccess |= GENERIC_READ; /* is this too little? */ 242 * where server does not support this SMB level, and falsely
218 if (OPEN_FMODE(oflags) & FMODE_WRITE) 243 * claims capability (also get here for DFS case which should be
219 desiredAccess |= GENERIC_WRITE; 244 * rare for path not covered on files)
220 245 */
221 if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
222 disposition = FILE_CREATE;
223 else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
224 disposition = FILE_OVERWRITE_IF;
225 else if ((oflags & O_CREAT) == O_CREAT)
226 disposition = FILE_OPEN_IF;
227 else
228 cFYI(1, "Create flag not set in create function");
229 } 246 }
230 247
248 desiredAccess = 0;
249 if (OPEN_FMODE(oflags) & FMODE_READ)
250 desiredAccess |= GENERIC_READ; /* is this too little? */
251 if (OPEN_FMODE(oflags) & FMODE_WRITE)
252 desiredAccess |= GENERIC_WRITE;
253
254 disposition = FILE_OVERWRITE_IF;
255 if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
256 disposition = FILE_CREATE;
257 else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
258 disposition = FILE_OVERWRITE_IF;
259 else if ((oflags & O_CREAT) == O_CREAT)
260 disposition = FILE_OPEN_IF;
261 else
262 cFYI(1, "Create flag not set in create function");
263
231 /* BB add processing to set equivalent of mode - e.g. via CreateX with 264 /* BB add processing to set equivalent of mode - e.g. via CreateX with
232 ACLs */ 265 ACLs */
233 266
234 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); 267 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
235 if (buf == NULL) { 268 if (buf == NULL) {
236 rc = -ENOMEM; 269 rc = -ENOMEM;
237 goto cifs_create_out; 270 goto out;
238 } 271 }
239 272
240 /* 273 /*
@@ -250,7 +283,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
250 if (tcon->ses->capabilities & CAP_NT_SMBS) 283 if (tcon->ses->capabilities & CAP_NT_SMBS)
251 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, 284 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
252 desiredAccess, create_options, 285 desiredAccess, create_options,
253 &fileHandle, &oplock, buf, cifs_sb->local_nls, 286 fileHandle, oplock, buf, cifs_sb->local_nls,
254 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 287 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
255 else 288 else
256 rc = -EIO; /* no NT SMB support fall into legacy open below */ 289 rc = -EIO; /* no NT SMB support fall into legacy open below */
@@ -259,17 +292,17 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
259 /* old server, retry the open legacy style */ 292 /* old server, retry the open legacy style */
260 rc = SMBLegacyOpen(xid, tcon, full_path, disposition, 293 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
261 desiredAccess, create_options, 294 desiredAccess, create_options,
262 &fileHandle, &oplock, buf, cifs_sb->local_nls, 295 fileHandle, oplock, buf, cifs_sb->local_nls,
263 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 296 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
264 } 297 }
265 if (rc) { 298 if (rc) {
266 cFYI(1, "cifs_create returned 0x%x", rc); 299 cFYI(1, "cifs_create returned 0x%x", rc);
267 goto cifs_create_out; 300 goto out;
268 } 301 }
269 302
270 /* If Open reported that we actually created a file 303 /* If Open reported that we actually created a file
271 then we now have to set the mode if possible */ 304 then we now have to set the mode if possible */
272 if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { 305 if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) {
273 struct cifs_unix_set_info_args args = { 306 struct cifs_unix_set_info_args args = {
274 .mode = mode, 307 .mode = mode,
275 .ctime = NO_CHANGE_64, 308 .ctime = NO_CHANGE_64,
@@ -278,6 +311,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
278 .device = 0, 311 .device = 0,
279 }; 312 };
280 313
314 *created |= FILE_CREATED;
281 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 315 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
282 args.uid = (__u64) current_fsuid(); 316 args.uid = (__u64) current_fsuid();
283 if (inode->i_mode & S_ISGID) 317 if (inode->i_mode & S_ISGID)
@@ -288,7 +322,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
288 args.uid = NO_CHANGE_64; 322 args.uid = NO_CHANGE_64;
289 args.gid = NO_CHANGE_64; 323 args.gid = NO_CHANGE_64;
290 } 324 }
291 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle, 325 CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle,
292 current->tgid); 326 current->tgid);
293 } else { 327 } else {
294 /* BB implement mode setting via Windows security 328 /* BB implement mode setting via Windows security
@@ -305,11 +339,11 @@ cifs_create_get_file_info:
305 inode->i_sb, xid); 339 inode->i_sb, xid);
306 else { 340 else {
307 rc = cifs_get_inode_info(&newinode, full_path, buf, 341 rc = cifs_get_inode_info(&newinode, full_path, buf,
308 inode->i_sb, xid, &fileHandle); 342 inode->i_sb, xid, fileHandle);
309 if (newinode) { 343 if (newinode) {
310 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) 344 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
311 newinode->i_mode = mode; 345 newinode->i_mode = mode;
312 if ((oplock & CIFS_CREATE_ACTION) && 346 if ((*oplock & CIFS_CREATE_ACTION) &&
313 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { 347 (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
314 newinode->i_uid = current_fsuid(); 348 newinode->i_uid = current_fsuid();
315 if (inode->i_mode & S_ISGID) 349 if (inode->i_mode & S_ISGID)
@@ -321,40 +355,139 @@ cifs_create_get_file_info:
321 } 355 }
322 356
323cifs_create_set_dentry: 357cifs_create_set_dentry:
324 if (rc == 0) 358 if (rc != 0) {
325 d_instantiate(direntry, newinode);
326 else
327 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); 359 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
360 goto out;
361 }
362 d_drop(direntry);
363 d_add(direntry, newinode);
328 364
329 if (newinode && nd) { 365 /* ENOENT for create? How weird... */
330 struct cifsFileInfo *pfile_info; 366 rc = -ENOENT;
331 struct file *filp; 367 if (!newinode) {
368 CIFSSMBClose(xid, tcon, *fileHandle);
369 goto out;
370 }
371 rc = 0;
332 372
333 filp = lookup_instantiate_filp(nd, direntry, generic_file_open); 373out:
334 if (IS_ERR(filp)) { 374 kfree(buf);
335 rc = PTR_ERR(filp); 375 kfree(full_path);
336 CIFSSMBClose(xid, tcon, fileHandle); 376 return rc;
337 goto cifs_create_out; 377}
338 }
339 378
340 pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock); 379int
341 if (pfile_info == NULL) { 380cifs_atomic_open(struct inode *inode, struct dentry *direntry,
342 fput(filp); 381 struct file *file, unsigned oflags, umode_t mode,
343 CIFSSMBClose(xid, tcon, fileHandle); 382 int *opened)
344 rc = -ENOMEM; 383{
345 } 384 int rc;
346 } else { 385 int xid;
386 struct tcon_link *tlink;
387 struct cifs_tcon *tcon;
388 __u16 fileHandle;
389 __u32 oplock;
390 struct file *filp;
391 struct cifsFileInfo *pfile_info;
392
393 /* Posix open is only called (at lookup time) for file create now. For
394 * opens (rather than creates), because we do not know if it is a file
395 * or directory yet, and current Samba no longer allows us to do posix
396 * open on dirs, we could end up wasting an open call on what turns out
397 * to be a dir. For file opens, we wait to call posix open till
398 * cifs_open. It could be added to atomic_open in the future but the
399 * performance tradeoff of the extra network request when EISDIR or
400 * EACCES is returned would have to be weighed against the 50% reduction
401 * in network traffic in the other paths.
402 */
403 if (!(oflags & O_CREAT)) {
404 struct dentry *res = cifs_lookup(inode, direntry, 0);
405 if (IS_ERR(res))
406 return PTR_ERR(res);
407
408 return finish_no_open(file, res);
409 }
410
411 rc = check_name(direntry);
412 if (rc)
413 return rc;
414
415 xid = GetXid();
416
417 cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p",
418 inode, direntry->d_name.name, direntry);
419
420 tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
421 filp = ERR_CAST(tlink);
422 if (IS_ERR(tlink))
423 goto free_xid;
424
425 tcon = tlink_tcon(tlink);
426
427 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
428 &oplock, &fileHandle, opened);
429
430 if (rc)
431 goto out;
432
433 rc = finish_open(file, direntry, generic_file_open, opened);
434 if (rc) {
347 CIFSSMBClose(xid, tcon, fileHandle); 435 CIFSSMBClose(xid, tcon, fileHandle);
436 goto out;
348 } 437 }
349 438
350cifs_create_out: 439 pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock);
351 kfree(buf); 440 if (pfile_info == NULL) {
352 kfree(full_path); 441 CIFSSMBClose(xid, tcon, fileHandle);
442 fput(filp);
443 rc = -ENOMEM;
444 }
445
446out:
353 cifs_put_tlink(tlink); 447 cifs_put_tlink(tlink);
448free_xid:
354 FreeXid(xid); 449 FreeXid(xid);
355 return rc; 450 return rc;
356} 451}
357 452
453int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
454 bool excl)
455{
456 int rc;
457 int xid = GetXid();
458 /*
459 * BB below access is probably too much for mknod to request
460 * but we have to do query and setpathinfo so requesting
461 * less could fail (unless we want to request getatr and setatr
462 * permissions (only). At least for POSIX we do not have to
463 * request so much.
464 */
465 unsigned oflags = O_EXCL | O_CREAT | O_RDWR;
466 struct tcon_link *tlink;
467 __u16 fileHandle;
468 __u32 oplock;
469 int created = FILE_CREATED;
470
471 cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p",
472 inode, direntry->d_name.name, direntry);
473
474 tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
475 rc = PTR_ERR(tlink);
476 if (IS_ERR(tlink))
477 goto free_xid;
478
479 rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode,
480 &oplock, &fileHandle, &created);
481 if (!rc)
482 CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle);
483
484 cifs_put_tlink(tlink);
485free_xid:
486 FreeXid(xid);
487
488 return rc;
489}
490
358int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, 491int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
359 dev_t device_number) 492 dev_t device_number)
360{ 493{
@@ -488,20 +621,15 @@ mknod_out:
488 621
489struct dentry * 622struct dentry *
490cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, 623cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
491 struct nameidata *nd) 624 unsigned int flags)
492{ 625{
493 int xid; 626 int xid;
494 int rc = 0; /* to get around spurious gcc warning, set to zero here */ 627 int rc = 0; /* to get around spurious gcc warning, set to zero here */
495 __u32 oplock;
496 __u16 fileHandle = 0;
497 bool posix_open = false;
498 struct cifs_sb_info *cifs_sb; 628 struct cifs_sb_info *cifs_sb;
499 struct tcon_link *tlink; 629 struct tcon_link *tlink;
500 struct cifs_tcon *pTcon; 630 struct cifs_tcon *pTcon;
501 struct cifsFileInfo *cfile;
502 struct inode *newInode = NULL; 631 struct inode *newInode = NULL;
503 char *full_path = NULL; 632 char *full_path = NULL;
504 struct file *filp;
505 633
506 xid = GetXid(); 634 xid = GetXid();
507 635
@@ -518,31 +646,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
518 } 646 }
519 pTcon = tlink_tcon(tlink); 647 pTcon = tlink_tcon(tlink);
520 648
521 oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0; 649 rc = check_name(direntry);
522 650 if (rc)
523 /*
524 * Don't allow the separator character in a path component.
525 * The VFS will not allow "/", but "\" is allowed by posix.
526 */
527 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
528 int i;
529 for (i = 0; i < direntry->d_name.len; i++)
530 if (direntry->d_name.name[i] == '\\') {
531 cFYI(1, "Invalid file name");
532 rc = -EINVAL;
533 goto lookup_out;
534 }
535 }
536
537 /*
538 * O_EXCL: optimize away the lookup, but don't hash the dentry. Let
539 * the VFS handle the create.
540 */
541 if (nd && (nd->flags & LOOKUP_EXCL)) {
542 d_instantiate(direntry, NULL);
543 rc = 0;
544 goto lookup_out; 651 goto lookup_out;
545 }
546 652
547 /* can not grab the rename sem here since it would 653 /* can not grab the rename sem here since it would
548 deadlock in the cases (beginning of sys_rename itself) 654 deadlock in the cases (beginning of sys_rename itself)
@@ -560,80 +666,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
560 } 666 }
561 cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode); 667 cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode);
562 668
563 /* Posix open is only called (at lookup time) for file create now.
564 * For opens (rather than creates), because we do not know if it
565 * is a file or directory yet, and current Samba no longer allows
566 * us to do posix open on dirs, we could end up wasting an open call
567 * on what turns out to be a dir. For file opens, we wait to call posix
568 * open till cifs_open. It could be added here (lookup) in the future
569 * but the performance tradeoff of the extra network request when EISDIR
570 * or EACCES is returned would have to be weighed against the 50%
571 * reduction in network traffic in the other paths.
572 */
573 if (pTcon->unix_ext) { 669 if (pTcon->unix_ext) {
574 if (nd && !(nd->flags & LOOKUP_DIRECTORY) && 670 rc = cifs_get_inode_info_unix(&newInode, full_path,
575 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && 671 parent_dir_inode->i_sb, xid);
576 (nd->intent.open.file->f_flags & O_CREAT)) { 672 } else {
577 rc = cifs_posix_open(full_path, &newInode,
578 parent_dir_inode->i_sb,
579 nd->intent.open.create_mode,
580 nd->intent.open.file->f_flags, &oplock,
581 &fileHandle, xid);
582 /*
583 * The check below works around a bug in POSIX
584 * open in samba versions 3.3.1 and earlier where
585 * open could incorrectly fail with invalid parameter.
586 * If either that or op not supported returned, follow
587 * the normal lookup.
588 */
589 switch (rc) {
590 case 0:
591 /*
592 * The server may allow us to open things like
593 * FIFOs, but the client isn't set up to deal
594 * with that. If it's not a regular file, just
595 * close it and proceed as if it were a normal
596 * lookup.
597 */
598 if (newInode && !S_ISREG(newInode->i_mode)) {
599 CIFSSMBClose(xid, pTcon, fileHandle);
600 break;
601 }
602 case -ENOENT:
603 posix_open = true;
604 case -EOPNOTSUPP:
605 break;
606 default:
607 pTcon->broken_posix_open = true;
608 }
609 }
610 if (!posix_open)
611 rc = cifs_get_inode_info_unix(&newInode, full_path,
612 parent_dir_inode->i_sb, xid);
613 } else
614 rc = cifs_get_inode_info(&newInode, full_path, NULL, 673 rc = cifs_get_inode_info(&newInode, full_path, NULL,
615 parent_dir_inode->i_sb, xid, NULL); 674 parent_dir_inode->i_sb, xid, NULL);
675 }
616 676
617 if ((rc == 0) && (newInode != NULL)) { 677 if ((rc == 0) && (newInode != NULL)) {
618 d_add(direntry, newInode); 678 d_add(direntry, newInode);
619 if (posix_open) {
620 filp = lookup_instantiate_filp(nd, direntry,
621 generic_file_open);
622 if (IS_ERR(filp)) {
623 rc = PTR_ERR(filp);
624 CIFSSMBClose(xid, pTcon, fileHandle);
625 goto lookup_out;
626 }
627
628 cfile = cifs_new_fileinfo(fileHandle, filp, tlink,
629 oplock);
630 if (cfile == NULL) {
631 fput(filp);
632 CIFSSMBClose(xid, pTcon, fileHandle);
633 rc = -ENOMEM;
634 goto lookup_out;
635 }
636 }
637 /* since paths are not looked up by component - the parent 679 /* since paths are not looked up by component - the parent
638 directories are presumed to be good here */ 680 directories are presumed to be good here */
639 renew_parental_timestamps(direntry); 681 renew_parental_timestamps(direntry);
@@ -658,9 +700,9 @@ lookup_out:
658} 700}
659 701
660static int 702static int
661cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) 703cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
662{ 704{
663 if (nd && (nd->flags & LOOKUP_RCU)) 705 if (flags & LOOKUP_RCU)
664 return -ECHILD; 706 return -ECHILD;
665 707
666 if (direntry->d_inode) { 708 if (direntry->d_inode) {
@@ -689,7 +731,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
689 * This may be nfsd (or something), anyway, we can't see the 731 * This may be nfsd (or something), anyway, we can't see the
690 * intent of this. So, since this can be for creation, drop it. 732 * intent of this. So, since this can be for creation, drop it.
691 */ 733 */
692 if (!nd) 734 if (!flags)
693 return 0; 735 return 0;
694 736
695 /* 737 /*
@@ -697,7 +739,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
697 * case sensitive name which is specified by user if this is 739 * case sensitive name which is specified by user if this is
698 * for creation. 740 * for creation.
699 */ 741 */
700 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 742 if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
701 return 0; 743 return 0;
702 744
703 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) 745 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 745da3d0653e..8e8bb49112ff 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -800,7 +800,7 @@ cifs_find_inode(struct inode *inode, void *opaque)
800 return 0; 800 return 0;
801 801
802 /* if it's not a directory or has no dentries, then flag it */ 802 /* if it's not a directory or has no dentries, then flag it */
803 if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) 803 if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry))
804 fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; 804 fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
805 805
806 return 1; 806 return 1;
@@ -825,9 +825,10 @@ static bool
825inode_has_hashed_dentries(struct inode *inode) 825inode_has_hashed_dentries(struct inode *inode)
826{ 826{
827 struct dentry *dentry; 827 struct dentry *dentry;
828 struct hlist_node *p;
828 829
829 spin_lock(&inode->i_lock); 830 spin_lock(&inode->i_lock);
830 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 831 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
831 if (!d_unhashed(dentry) || IS_ROOT(dentry)) { 832 if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
832 spin_unlock(&inode->i_lock); 833 spin_unlock(&inode->i_lock);
833 return true; 834 return true;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0a8224d1c4c5..a4217f02fab2 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -86,9 +86,12 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
86 86
87 dentry = d_lookup(parent, name); 87 dentry = d_lookup(parent, name);
88 if (dentry) { 88 if (dentry) {
89 /* FIXME: check for inode number changes? */ 89 inode = dentry->d_inode;
90 if (dentry->d_inode != NULL) 90 /* update inode in place if i_ino didn't change */
91 if (inode && CIFS_I(inode)->uniqueid == fattr->cf_uniqueid) {
92 cifs_fattr_to_inode(inode, fattr);
91 return dentry; 93 return dentry;
94 }
92 d_drop(dentry); 95 d_drop(dentry);
93 dput(dentry); 96 dput(dentry);
94 } 97 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 3097ee58fd7d..f25d4ea14be4 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -365,16 +365,14 @@ cifs_setup_async_request(struct TCP_Server_Info *server, struct kvec *iov,
365 if (mid == NULL) 365 if (mid == NULL)
366 return -ENOMEM; 366 return -ENOMEM;
367 367
368 /* put it on the pending_mid_q */
369 spin_lock(&GlobalMid_Lock);
370 list_add_tail(&mid->qhead, &server->pending_mid_q);
371 spin_unlock(&GlobalMid_Lock);
372
373 rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number); 368 rc = cifs_sign_smb2(iov, nvec, server, &mid->sequence_number);
374 if (rc) 369 if (rc) {
375 delete_mid(mid); 370 DeleteMidQEntry(mid);
371 return rc;
372 }
373
376 *ret_mid = mid; 374 *ret_mid = mid;
377 return rc; 375 return 0;
378} 376}
379 377
380/* 378/*
@@ -407,17 +405,21 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov,
407 mid->callback_data = cbdata; 405 mid->callback_data = cbdata;
408 mid->mid_state = MID_REQUEST_SUBMITTED; 406 mid->mid_state = MID_REQUEST_SUBMITTED;
409 407
408 /* put it on the pending_mid_q */
409 spin_lock(&GlobalMid_Lock);
410 list_add_tail(&mid->qhead, &server->pending_mid_q);
411 spin_unlock(&GlobalMid_Lock);
412
413
410 cifs_in_send_inc(server); 414 cifs_in_send_inc(server);
411 rc = smb_sendv(server, iov, nvec); 415 rc = smb_sendv(server, iov, nvec);
412 cifs_in_send_dec(server); 416 cifs_in_send_dec(server);
413 cifs_save_when_sent(mid); 417 cifs_save_when_sent(mid);
414 mutex_unlock(&server->srv_mutex); 418 mutex_unlock(&server->srv_mutex);
415 419
416 if (rc) 420 if (rc == 0)
417 goto out_err; 421 return 0;
418 422
419 return rc;
420out_err:
421 delete_mid(mid); 423 delete_mid(mid);
422 add_credits(server, 1); 424 add_credits(server, 1);
423 wake_up(&server->request_q); 425 wake_up(&server->request_q);
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 690157876184..958ae0e0ff8c 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -89,17 +89,13 @@ int coda_cache_check(struct inode *inode, int mask)
89/* this won't do any harm: just flag all children */ 89/* this won't do any harm: just flag all children */
90static void coda_flag_children(struct dentry *parent, int flag) 90static void coda_flag_children(struct dentry *parent, int flag)
91{ 91{
92 struct list_head *child;
93 struct dentry *de; 92 struct dentry *de;
94 93
95 spin_lock(&parent->d_lock); 94 spin_lock(&parent->d_lock);
96 list_for_each(child, &parent->d_subdirs) 95 list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) {
97 {
98 de = list_entry(child, struct dentry, d_u.d_child);
99 /* don't know what to do with negative dentries */ 96 /* don't know what to do with negative dentries */
100 if ( ! de->d_inode ) 97 if (de->d_inode )
101 continue; 98 coda_flag_inode(de->d_inode, flag);
102 coda_flag_inode(de->d_inode, flag);
103 } 99 }
104 spin_unlock(&parent->d_lock); 100 spin_unlock(&parent->d_lock);
105 return; 101 return;
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 177515829062..49fe52d25600 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -30,8 +30,8 @@
30#include "coda_int.h" 30#include "coda_int.h"
31 31
32/* dir inode-ops */ 32/* dir inode-ops */
33static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd); 33static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, bool excl);
34static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd); 34static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags);
35static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, 35static int coda_link(struct dentry *old_dentry, struct inode *dir_inode,
36 struct dentry *entry); 36 struct dentry *entry);
37static int coda_unlink(struct inode *dir_inode, struct dentry *entry); 37static int coda_unlink(struct inode *dir_inode, struct dentry *entry);
@@ -46,7 +46,7 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry,
46static int coda_readdir(struct file *file, void *buf, filldir_t filldir); 46static int coda_readdir(struct file *file, void *buf, filldir_t filldir);
47 47
48/* dentry ops */ 48/* dentry ops */
49static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); 49static int coda_dentry_revalidate(struct dentry *de, unsigned int flags);
50static int coda_dentry_delete(const struct dentry *); 50static int coda_dentry_delete(const struct dentry *);
51 51
52/* support routines */ 52/* support routines */
@@ -94,7 +94,7 @@ const struct file_operations coda_dir_operations = {
94 94
95/* inode operations for directories */ 95/* inode operations for directories */
96/* access routines: lookup, readlink, permission */ 96/* access routines: lookup, readlink, permission */
97static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) 97static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsigned int flags)
98{ 98{
99 struct super_block *sb = dir->i_sb; 99 struct super_block *sb = dir->i_sb;
100 const char *name = entry->d_name.name; 100 const char *name = entry->d_name.name;
@@ -188,7 +188,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
188} 188}
189 189
190/* creation routines: create, mknod, mkdir, link, symlink */ 190/* creation routines: create, mknod, mkdir, link, symlink */
191static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd) 191static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool excl)
192{ 192{
193 int error; 193 int error;
194 const char *name=de->d_name.name; 194 const char *name=de->d_name.name;
@@ -536,12 +536,12 @@ out:
536} 536}
537 537
538/* called when a cache lookup succeeds */ 538/* called when a cache lookup succeeds */
539static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) 539static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
540{ 540{
541 struct inode *inode; 541 struct inode *inode;
542 struct coda_inode_info *cii; 542 struct coda_inode_info *cii;
543 543
544 if (nd->flags & LOOKUP_RCU) 544 if (flags & LOOKUP_RCU)
545 return -ECHILD; 545 return -ECHILD;
546 546
547 inode = de->d_inode; 547 inode = de->d_inode;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7e6c52d8a207..7414ae24a79b 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
442 442
443static struct dentry * configfs_lookup(struct inode *dir, 443static struct dentry * configfs_lookup(struct inode *dir,
444 struct dentry *dentry, 444 struct dentry *dentry,
445 struct nameidata *nd) 445 unsigned int flags)
446{ 446{
447 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 447 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
448 struct configfs_dirent * sd; 448 struct configfs_dirent * sd;
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index d013c46402ed..28cca01ca9c9 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -417,7 +417,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
417/* 417/*
418 * Lookup and fill in the inode data.. 418 * Lookup and fill in the inode data..
419 */ 419 */
420static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 420static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
421{ 421{
422 unsigned int offset = 0; 422 unsigned int offset = 0;
423 struct inode *inode = NULL; 423 struct inode *inode = NULL;
diff --git a/fs/dcache.c b/fs/dcache.c
index 40469044088d..8086636bf796 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -218,7 +218,7 @@ static void __d_free(struct rcu_head *head)
218{ 218{
219 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 219 struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
220 220
221 WARN_ON(!list_empty(&dentry->d_alias)); 221 WARN_ON(!hlist_unhashed(&dentry->d_alias));
222 if (dname_external(dentry)) 222 if (dname_external(dentry))
223 kfree(dentry->d_name.name); 223 kfree(dentry->d_name.name);
224 kmem_cache_free(dentry_cache, dentry); 224 kmem_cache_free(dentry_cache, dentry);
@@ -267,7 +267,7 @@ static void dentry_iput(struct dentry * dentry)
267 struct inode *inode = dentry->d_inode; 267 struct inode *inode = dentry->d_inode;
268 if (inode) { 268 if (inode) {
269 dentry->d_inode = NULL; 269 dentry->d_inode = NULL;
270 list_del_init(&dentry->d_alias); 270 hlist_del_init(&dentry->d_alias);
271 spin_unlock(&dentry->d_lock); 271 spin_unlock(&dentry->d_lock);
272 spin_unlock(&inode->i_lock); 272 spin_unlock(&inode->i_lock);
273 if (!inode->i_nlink) 273 if (!inode->i_nlink)
@@ -291,7 +291,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
291{ 291{
292 struct inode *inode = dentry->d_inode; 292 struct inode *inode = dentry->d_inode;
293 dentry->d_inode = NULL; 293 dentry->d_inode = NULL;
294 list_del_init(&dentry->d_alias); 294 hlist_del_init(&dentry->d_alias);
295 dentry_rcuwalk_barrier(dentry); 295 dentry_rcuwalk_barrier(dentry);
296 spin_unlock(&dentry->d_lock); 296 spin_unlock(&dentry->d_lock);
297 spin_unlock(&inode->i_lock); 297 spin_unlock(&inode->i_lock);
@@ -699,10 +699,11 @@ EXPORT_SYMBOL(dget_parent);
699static struct dentry *__d_find_alias(struct inode *inode, int want_discon) 699static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
700{ 700{
701 struct dentry *alias, *discon_alias; 701 struct dentry *alias, *discon_alias;
702 struct hlist_node *p;
702 703
703again: 704again:
704 discon_alias = NULL; 705 discon_alias = NULL;
705 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 706 hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
706 spin_lock(&alias->d_lock); 707 spin_lock(&alias->d_lock);
707 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { 708 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
708 if (IS_ROOT(alias) && 709 if (IS_ROOT(alias) &&
@@ -737,7 +738,7 @@ struct dentry *d_find_alias(struct inode *inode)
737{ 738{
738 struct dentry *de = NULL; 739 struct dentry *de = NULL;
739 740
740 if (!list_empty(&inode->i_dentry)) { 741 if (!hlist_empty(&inode->i_dentry)) {
741 spin_lock(&inode->i_lock); 742 spin_lock(&inode->i_lock);
742 de = __d_find_alias(inode, 0); 743 de = __d_find_alias(inode, 0);
743 spin_unlock(&inode->i_lock); 744 spin_unlock(&inode->i_lock);
@@ -753,9 +754,10 @@ EXPORT_SYMBOL(d_find_alias);
753void d_prune_aliases(struct inode *inode) 754void d_prune_aliases(struct inode *inode)
754{ 755{
755 struct dentry *dentry; 756 struct dentry *dentry;
757 struct hlist_node *p;
756restart: 758restart:
757 spin_lock(&inode->i_lock); 759 spin_lock(&inode->i_lock);
758 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 760 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
759 spin_lock(&dentry->d_lock); 761 spin_lock(&dentry->d_lock);
760 if (!dentry->d_count) { 762 if (!dentry->d_count) {
761 __dget_dlock(dentry); 763 __dget_dlock(dentry);
@@ -977,7 +979,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
977 inode = dentry->d_inode; 979 inode = dentry->d_inode;
978 if (inode) { 980 if (inode) {
979 dentry->d_inode = NULL; 981 dentry->d_inode = NULL;
980 list_del_init(&dentry->d_alias); 982 hlist_del_init(&dentry->d_alias);
981 if (dentry->d_op && dentry->d_op->d_iput) 983 if (dentry->d_op && dentry->d_op->d_iput)
982 dentry->d_op->d_iput(dentry, inode); 984 dentry->d_op->d_iput(dentry, inode);
983 else 985 else
@@ -1312,7 +1314,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1312 INIT_HLIST_BL_NODE(&dentry->d_hash); 1314 INIT_HLIST_BL_NODE(&dentry->d_hash);
1313 INIT_LIST_HEAD(&dentry->d_lru); 1315 INIT_LIST_HEAD(&dentry->d_lru);
1314 INIT_LIST_HEAD(&dentry->d_subdirs); 1316 INIT_LIST_HEAD(&dentry->d_subdirs);
1315 INIT_LIST_HEAD(&dentry->d_alias); 1317 INIT_HLIST_NODE(&dentry->d_alias);
1316 INIT_LIST_HEAD(&dentry->d_u.d_child); 1318 INIT_LIST_HEAD(&dentry->d_u.d_child);
1317 d_set_d_op(dentry, dentry->d_sb->s_d_op); 1319 d_set_d_op(dentry, dentry->d_sb->s_d_op);
1318 1320
@@ -1400,7 +1402,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1400 if (inode) { 1402 if (inode) {
1401 if (unlikely(IS_AUTOMOUNT(inode))) 1403 if (unlikely(IS_AUTOMOUNT(inode)))
1402 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; 1404 dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
1403 list_add(&dentry->d_alias, &inode->i_dentry); 1405 hlist_add_head(&dentry->d_alias, &inode->i_dentry);
1404 } 1406 }
1405 dentry->d_inode = inode; 1407 dentry->d_inode = inode;
1406 dentry_rcuwalk_barrier(dentry); 1408 dentry_rcuwalk_barrier(dentry);
@@ -1425,7 +1427,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
1425 1427
1426void d_instantiate(struct dentry *entry, struct inode * inode) 1428void d_instantiate(struct dentry *entry, struct inode * inode)
1427{ 1429{
1428 BUG_ON(!list_empty(&entry->d_alias)); 1430 BUG_ON(!hlist_unhashed(&entry->d_alias));
1429 if (inode) 1431 if (inode)
1430 spin_lock(&inode->i_lock); 1432 spin_lock(&inode->i_lock);
1431 __d_instantiate(entry, inode); 1433 __d_instantiate(entry, inode);
@@ -1458,13 +1460,14 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1458 int len = entry->d_name.len; 1460 int len = entry->d_name.len;
1459 const char *name = entry->d_name.name; 1461 const char *name = entry->d_name.name;
1460 unsigned int hash = entry->d_name.hash; 1462 unsigned int hash = entry->d_name.hash;
1463 struct hlist_node *p;
1461 1464
1462 if (!inode) { 1465 if (!inode) {
1463 __d_instantiate(entry, NULL); 1466 __d_instantiate(entry, NULL);
1464 return NULL; 1467 return NULL;
1465 } 1468 }
1466 1469
1467 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 1470 hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
1468 /* 1471 /*
1469 * Don't need alias->d_lock here, because aliases with 1472 * Don't need alias->d_lock here, because aliases with
1470 * d_parent == entry->d_parent are not subject to name or 1473 * d_parent == entry->d_parent are not subject to name or
@@ -1490,7 +1493,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
1490{ 1493{
1491 struct dentry *result; 1494 struct dentry *result;
1492 1495
1493 BUG_ON(!list_empty(&entry->d_alias)); 1496 BUG_ON(!hlist_unhashed(&entry->d_alias));
1494 1497
1495 if (inode) 1498 if (inode)
1496 spin_lock(&inode->i_lock); 1499 spin_lock(&inode->i_lock);
@@ -1531,9 +1534,9 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
1531{ 1534{
1532 struct dentry *alias; 1535 struct dentry *alias;
1533 1536
1534 if (list_empty(&inode->i_dentry)) 1537 if (hlist_empty(&inode->i_dentry))
1535 return NULL; 1538 return NULL;
1536 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias); 1539 alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
1537 __dget(alias); 1540 __dget(alias);
1538 return alias; 1541 return alias;
1539} 1542}
@@ -1607,7 +1610,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1607 spin_lock(&tmp->d_lock); 1610 spin_lock(&tmp->d_lock);
1608 tmp->d_inode = inode; 1611 tmp->d_inode = inode;
1609 tmp->d_flags |= DCACHE_DISCONNECTED; 1612 tmp->d_flags |= DCACHE_DISCONNECTED;
1610 list_add(&tmp->d_alias, &inode->i_dentry); 1613 hlist_add_head(&tmp->d_alias, &inode->i_dentry);
1611 hlist_bl_lock(&tmp->d_sb->s_anon); 1614 hlist_bl_lock(&tmp->d_sb->s_anon);
1612 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); 1615 hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
1613 hlist_bl_unlock(&tmp->d_sb->s_anon); 1616 hlist_bl_unlock(&tmp->d_sb->s_anon);
@@ -2384,14 +2387,13 @@ static struct dentry *__d_unalias(struct inode *inode,
2384 struct dentry *dentry, struct dentry *alias) 2387 struct dentry *dentry, struct dentry *alias)
2385{ 2388{
2386 struct mutex *m1 = NULL, *m2 = NULL; 2389 struct mutex *m1 = NULL, *m2 = NULL;
2387 struct dentry *ret; 2390 struct dentry *ret = ERR_PTR(-EBUSY);
2388 2391
2389 /* If alias and dentry share a parent, then no extra locks required */ 2392 /* If alias and dentry share a parent, then no extra locks required */
2390 if (alias->d_parent == dentry->d_parent) 2393 if (alias->d_parent == dentry->d_parent)
2391 goto out_unalias; 2394 goto out_unalias;
2392 2395
2393 /* See lock_rename() */ 2396 /* See lock_rename() */
2394 ret = ERR_PTR(-EBUSY);
2395 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) 2397 if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
2396 goto out_err; 2398 goto out_err;
2397 m1 = &dentry->d_sb->s_vfs_rename_mutex; 2399 m1 = &dentry->d_sb->s_vfs_rename_mutex;
@@ -2399,8 +2401,10 @@ static struct dentry *__d_unalias(struct inode *inode,
2399 goto out_err; 2401 goto out_err;
2400 m2 = &alias->d_parent->d_inode->i_mutex; 2402 m2 = &alias->d_parent->d_inode->i_mutex;
2401out_unalias: 2403out_unalias:
2402 __d_move(alias, dentry); 2404 if (likely(!d_mountpoint(alias))) {
2403 ret = alias; 2405 __d_move(alias, dentry);
2406 ret = alias;
2407 }
2404out_err: 2408out_err:
2405 spin_unlock(&inode->i_lock); 2409 spin_unlock(&inode->i_lock);
2406 if (m2) 2410 if (m2)
@@ -2622,7 +2626,7 @@ global_root:
2622 if (!slash) 2626 if (!slash)
2623 error = prepend(buffer, buflen, "/", 1); 2627 error = prepend(buffer, buflen, "/", 1);
2624 if (!error) 2628 if (!error)
2625 error = real_mount(vfsmnt)->mnt_ns ? 1 : 2; 2629 error = is_mounted(vfsmnt) ? 1 : 2;
2626 goto out; 2630 goto out;
2627} 2631}
2628 2632
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b80bc846a15a..d17c20fd74e6 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -54,13 +54,12 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev
54 break; 54 break;
55 case S_IFLNK: 55 case S_IFLNK:
56 inode->i_op = &debugfs_link_operations; 56 inode->i_op = &debugfs_link_operations;
57 inode->i_fop = fops;
58 inode->i_private = data; 57 inode->i_private = data;
59 break; 58 break;
60 case S_IFDIR: 59 case S_IFDIR:
61 inode->i_op = &simple_dir_inode_operations; 60 inode->i_op = &simple_dir_inode_operations;
62 inode->i_fop = fops ? fops : &simple_dir_operations; 61 inode->i_fop = &simple_dir_operations;
63 inode->i_private = data; 62 inode->i_private = NULL;
64 63
65 /* directory inodes start off with i_nlink == 2 64 /* directory inodes start off with i_nlink == 2
66 * (for "." entry) */ 65 * (for "." entry) */
@@ -91,13 +90,12 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
91 return error; 90 return error;
92} 91}
93 92
94static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, 93static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
95 void *data, const struct file_operations *fops)
96{ 94{
97 int res; 95 int res;
98 96
99 mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; 97 mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR;
100 res = debugfs_mknod(dir, dentry, mode, 0, data, fops); 98 res = debugfs_mknod(dir, dentry, mode, 0, NULL, NULL);
101 if (!res) { 99 if (!res) {
102 inc_nlink(dir); 100 inc_nlink(dir);
103 fsnotify_mkdir(dir, dentry); 101 fsnotify_mkdir(dir, dentry);
@@ -106,10 +104,10 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
106} 104}
107 105
108static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode, 106static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode,
109 void *data, const struct file_operations *fops) 107 void *data)
110{ 108{
111 mode = (mode & S_IALLUGO) | S_IFLNK; 109 mode = (mode & S_IALLUGO) | S_IFLNK;
112 return debugfs_mknod(dir, dentry, mode, 0, data, fops); 110 return debugfs_mknod(dir, dentry, mode, 0, data, NULL);
113} 111}
114 112
115static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 113static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
@@ -293,13 +291,19 @@ static struct file_system_type debug_fs_type = {
293 .kill_sb = kill_litter_super, 291 .kill_sb = kill_litter_super,
294}; 292};
295 293
296static int debugfs_create_by_name(const char *name, umode_t mode, 294struct dentry *__create_file(const char *name, umode_t mode,
297 struct dentry *parent, 295 struct dentry *parent, void *data,
298 struct dentry **dentry, 296 const struct file_operations *fops)
299 void *data,
300 const struct file_operations *fops)
301{ 297{
302 int error = 0; 298 struct dentry *dentry = NULL;
299 int error;
300
301 pr_debug("debugfs: creating file '%s'\n",name);
302
303 error = simple_pin_fs(&debug_fs_type, &debugfs_mount,
304 &debugfs_mount_count);
305 if (error)
306 goto exit;
303 307
304 /* If the parent is not specified, we create it in the root. 308 /* If the parent is not specified, we create it in the root.
305 * We need the root dentry to do this, which is in the super 309 * We need the root dentry to do this, which is in the super
@@ -309,30 +313,35 @@ static int debugfs_create_by_name(const char *name, umode_t mode,
309 if (!parent) 313 if (!parent)
310 parent = debugfs_mount->mnt_root; 314 parent = debugfs_mount->mnt_root;
311 315
312 *dentry = NULL; 316 dentry = NULL;
313 mutex_lock(&parent->d_inode->i_mutex); 317 mutex_lock(&parent->d_inode->i_mutex);
314 *dentry = lookup_one_len(name, parent, strlen(name)); 318 dentry = lookup_one_len(name, parent, strlen(name));
315 if (!IS_ERR(*dentry)) { 319 if (!IS_ERR(dentry)) {
316 switch (mode & S_IFMT) { 320 switch (mode & S_IFMT) {
317 case S_IFDIR: 321 case S_IFDIR:
318 error = debugfs_mkdir(parent->d_inode, *dentry, mode, 322 error = debugfs_mkdir(parent->d_inode, dentry, mode);
319 data, fops); 323
320 break; 324 break;
321 case S_IFLNK: 325 case S_IFLNK:
322 error = debugfs_link(parent->d_inode, *dentry, mode, 326 error = debugfs_link(parent->d_inode, dentry, mode,
323 data, fops); 327 data);
324 break; 328 break;
325 default: 329 default:
326 error = debugfs_create(parent->d_inode, *dentry, mode, 330 error = debugfs_create(parent->d_inode, dentry, mode,
327 data, fops); 331 data, fops);
328 break; 332 break;
329 } 333 }
330 dput(*dentry); 334 dput(dentry);
331 } else 335 } else
332 error = PTR_ERR(*dentry); 336 error = PTR_ERR(dentry);
333 mutex_unlock(&parent->d_inode->i_mutex); 337 mutex_unlock(&parent->d_inode->i_mutex);
334 338
335 return error; 339 if (error) {
340 dentry = NULL;
341 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
342 }
343exit:
344 return dentry;
336} 345}
337 346
338/** 347/**
@@ -365,25 +374,15 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
365 struct dentry *parent, void *data, 374 struct dentry *parent, void *data,
366 const struct file_operations *fops) 375 const struct file_operations *fops)
367{ 376{
368 struct dentry *dentry = NULL; 377 switch (mode & S_IFMT) {
369 int error; 378 case S_IFREG:
370 379 case 0:
371 pr_debug("debugfs: creating file '%s'\n",name); 380 break;
372 381 default:
373 error = simple_pin_fs(&debug_fs_type, &debugfs_mount, 382 BUG();
374 &debugfs_mount_count);
375 if (error)
376 goto exit;
377
378 error = debugfs_create_by_name(name, mode, parent, &dentry,
379 data, fops);
380 if (error) {
381 dentry = NULL;
382 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
383 goto exit;
384 } 383 }
385exit: 384
386 return dentry; 385 return __create_file(name, mode, parent, data, fops);
387} 386}
388EXPORT_SYMBOL_GPL(debugfs_create_file); 387EXPORT_SYMBOL_GPL(debugfs_create_file);
389 388
@@ -407,8 +406,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_file);
407 */ 406 */
408struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) 407struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
409{ 408{
410 return debugfs_create_file(name, 409 return __create_file(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
411 S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
412 parent, NULL, NULL); 410 parent, NULL, NULL);
413} 411}
414EXPORT_SYMBOL_GPL(debugfs_create_dir); 412EXPORT_SYMBOL_GPL(debugfs_create_dir);
@@ -446,8 +444,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
446 if (!link) 444 if (!link)
447 return NULL; 445 return NULL;
448 446
449 result = debugfs_create_file(name, S_IFLNK | S_IRWXUGO, parent, link, 447 result = __create_file(name, S_IFLNK | S_IRWXUGO, parent, link, NULL);
450 NULL);
451 if (!result) 448 if (!result)
452 kfree(link); 449 kfree(link);
453 return result; 450 return result;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 979c1e309c73..14afbabe6546 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -439,15 +439,15 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type,
439 return ERR_PTR(error); 439 return ERR_PTR(error);
440 440
441 if (opts.newinstance) 441 if (opts.newinstance)
442 s = sget(fs_type, NULL, set_anon_super, NULL); 442 s = sget(fs_type, NULL, set_anon_super, flags, NULL);
443 else 443 else
444 s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); 444 s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags,
445 NULL);
445 446
446 if (IS_ERR(s)) 447 if (IS_ERR(s))
447 return ERR_CAST(s); 448 return ERR_CAST(s);
448 449
449 if (!s->s_root) { 450 if (!s->s_root) {
450 s->s_flags = flags;
451 error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0); 451 error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
452 if (error) 452 if (error)
453 goto out_undo_sget; 453 goto out_undo_sget;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0c85fae37666..1faf4cb56f39 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1258,7 +1258,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1258 */ 1258 */
1259 BUG_ON(retval == -EIOCBQUEUED); 1259 BUG_ON(retval == -EIOCBQUEUED);
1260 if (dio->is_async && retval == 0 && dio->result && 1260 if (dio->is_async && retval == 0 && dio->result &&
1261 ((rw & READ) || (dio->result == sdio.size))) 1261 ((rw == READ) || (dio->result == sdio.size)))
1262 retval = -EIOCBQUEUED; 1262 retval = -EIOCBQUEUED;
1263 1263
1264 if (retval != -EIOCBQUEUED) 1264 if (retval != -EIOCBQUEUED)
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 534c1d46e69e..1b5d9af937df 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -32,7 +32,7 @@
32/** 32/**
33 * ecryptfs_d_revalidate - revalidate an ecryptfs dentry 33 * ecryptfs_d_revalidate - revalidate an ecryptfs dentry
34 * @dentry: The ecryptfs dentry 34 * @dentry: The ecryptfs dentry
35 * @nd: The associated nameidata 35 * @flags: lookup flags
36 * 36 *
37 * Called when the VFS needs to revalidate a dentry. This 37 * Called when the VFS needs to revalidate a dentry. This
38 * is called whenever a name lookup finds a dentry in the 38 * is called whenever a name lookup finds a dentry in the
@@ -42,32 +42,20 @@
42 * Returns 1 if valid, 0 otherwise. 42 * Returns 1 if valid, 0 otherwise.
43 * 43 *
44 */ 44 */
45static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) 45static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
46{ 46{
47 struct dentry *lower_dentry; 47 struct dentry *lower_dentry;
48 struct vfsmount *lower_mnt; 48 struct vfsmount *lower_mnt;
49 struct dentry *dentry_save = NULL;
50 struct vfsmount *vfsmount_save = NULL;
51 int rc = 1; 49 int rc = 1;
52 50
53 if (nd && nd->flags & LOOKUP_RCU) 51 if (flags & LOOKUP_RCU)
54 return -ECHILD; 52 return -ECHILD;
55 53
56 lower_dentry = ecryptfs_dentry_to_lower(dentry); 54 lower_dentry = ecryptfs_dentry_to_lower(dentry);
57 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 55 lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
58 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) 56 if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate)
59 goto out; 57 goto out;
60 if (nd) { 58 rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
61 dentry_save = nd->path.dentry;
62 vfsmount_save = nd->path.mnt;
63 nd->path.dentry = lower_dentry;
64 nd->path.mnt = lower_mnt;
65 }
66 rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd);
67 if (nd) {
68 nd->path.dentry = dentry_save;
69 nd->path.mnt = vfsmount_save;
70 }
71 if (dentry->d_inode) { 59 if (dentry->d_inode) {
72 struct inode *lower_inode = 60 struct inode *lower_inode =
73 ecryptfs_inode_to_lower(dentry->d_inode); 61 ecryptfs_inode_to_lower(dentry->d_inode);
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 867b64c5d84f..989e034f02bd 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -550,20 +550,6 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
550extern struct kmem_cache *ecryptfs_key_sig_cache; 550extern struct kmem_cache *ecryptfs_key_sig_cache;
551extern struct kmem_cache *ecryptfs_global_auth_tok_cache; 551extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
552extern struct kmem_cache *ecryptfs_key_tfm_cache; 552extern struct kmem_cache *ecryptfs_key_tfm_cache;
553extern struct kmem_cache *ecryptfs_open_req_cache;
554
555struct ecryptfs_open_req {
556#define ECRYPTFS_REQ_PROCESSED 0x00000001
557#define ECRYPTFS_REQ_DROPPED 0x00000002
558#define ECRYPTFS_REQ_ZOMBIE 0x00000004
559 u32 flags;
560 struct file **lower_file;
561 struct dentry *lower_dentry;
562 struct vfsmount *lower_mnt;
563 wait_queue_head_t wait;
564 struct mutex mux;
565 struct list_head kthread_ctl_list;
566};
567 553
568struct inode *ecryptfs_get_inode(struct inode *lower_inode, 554struct inode *ecryptfs_get_inode(struct inode *lower_inode,
569 struct super_block *sb); 555 struct super_block *sb);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index a07441a0a878..ffa2be57804d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -173,7 +173,7 @@ ecryptfs_do_create(struct inode *directory_inode,
173 inode = ERR_CAST(lower_dir_dentry); 173 inode = ERR_CAST(lower_dir_dentry);
174 goto out; 174 goto out;
175 } 175 }
176 rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, NULL); 176 rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true);
177 if (rc) { 177 if (rc) {
178 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 178 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
179 "rc = [%d]\n", __func__, rc); 179 "rc = [%d]\n", __func__, rc);
@@ -240,7 +240,6 @@ out:
240 * @dir: The inode of the directory in which to create the file. 240 * @dir: The inode of the directory in which to create the file.
241 * @dentry: The eCryptfs dentry 241 * @dentry: The eCryptfs dentry
242 * @mode: The mode of the new file. 242 * @mode: The mode of the new file.
243 * @nd: nameidata
244 * 243 *
245 * Creates a new file. 244 * Creates a new file.
246 * 245 *
@@ -248,7 +247,7 @@ out:
248 */ 247 */
249static int 248static int
250ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, 249ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
251 umode_t mode, struct nameidata *nd) 250 umode_t mode, bool excl)
252{ 251{
253 struct inode *ecryptfs_inode; 252 struct inode *ecryptfs_inode;
254 int rc; 253 int rc;
@@ -270,8 +269,8 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
270 iput(ecryptfs_inode); 269 iput(ecryptfs_inode);
271 goto out; 270 goto out;
272 } 271 }
273 d_instantiate(ecryptfs_dentry, ecryptfs_inode);
274 unlock_new_inode(ecryptfs_inode); 272 unlock_new_inode(ecryptfs_inode);
273 d_instantiate(ecryptfs_dentry, ecryptfs_inode);
275out: 274out:
276 return rc; 275 return rc;
277} 276}
@@ -374,7 +373,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
374 */ 373 */
375static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, 374static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
376 struct dentry *ecryptfs_dentry, 375 struct dentry *ecryptfs_dentry,
377 struct nameidata *ecryptfs_nd) 376 unsigned int flags)
378{ 377{
379 char *encrypted_and_encoded_name = NULL; 378 char *encrypted_and_encoded_name = NULL;
380 size_t encrypted_and_encoded_name_size; 379 size_t encrypted_and_encoded_name_size;
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 0dbe58a8b172..809e67d05ca3 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -27,7 +27,12 @@
27#include <linux/mount.h> 27#include <linux/mount.h>
28#include "ecryptfs_kernel.h" 28#include "ecryptfs_kernel.h"
29 29
30struct kmem_cache *ecryptfs_open_req_cache; 30struct ecryptfs_open_req {
31 struct file **lower_file;
32 struct path path;
33 struct completion done;
34 struct list_head kthread_ctl_list;
35};
31 36
32static struct ecryptfs_kthread_ctl { 37static struct ecryptfs_kthread_ctl {
33#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001 38#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
@@ -67,18 +72,10 @@ static int ecryptfs_threadfn(void *ignored)
67 req = list_first_entry(&ecryptfs_kthread_ctl.req_list, 72 req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
68 struct ecryptfs_open_req, 73 struct ecryptfs_open_req,
69 kthread_ctl_list); 74 kthread_ctl_list);
70 mutex_lock(&req->mux);
71 list_del(&req->kthread_ctl_list); 75 list_del(&req->kthread_ctl_list);
72 if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) { 76 *req->lower_file = dentry_open(&req->path,
73 dget(req->lower_dentry); 77 (O_RDWR | O_LARGEFILE), current_cred());
74 mntget(req->lower_mnt); 78 complete(&req->done);
75 (*req->lower_file) = dentry_open(
76 req->lower_dentry, req->lower_mnt,
77 (O_RDWR | O_LARGEFILE), current_cred());
78 req->flags |= ECRYPTFS_REQ_PROCESSED;
79 }
80 wake_up(&req->wait);
81 mutex_unlock(&req->mux);
82 } 79 }
83 mutex_unlock(&ecryptfs_kthread_ctl.mux); 80 mutex_unlock(&ecryptfs_kthread_ctl.mux);
84 } 81 }
@@ -111,10 +108,9 @@ void ecryptfs_destroy_kthread(void)
111 ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE; 108 ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
112 list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list, 109 list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
113 kthread_ctl_list) { 110 kthread_ctl_list) {
114 mutex_lock(&req->mux); 111 list_del(&req->kthread_ctl_list);
115 req->flags |= ECRYPTFS_REQ_ZOMBIE; 112 *req->lower_file = ERR_PTR(-EIO);
116 wake_up(&req->wait); 113 complete(&req->done);
117 mutex_unlock(&req->mux);
118 } 114 }
119 mutex_unlock(&ecryptfs_kthread_ctl.mux); 115 mutex_unlock(&ecryptfs_kthread_ctl.mux);
120 kthread_stop(ecryptfs_kthread); 116 kthread_stop(ecryptfs_kthread);
@@ -136,34 +132,26 @@ int ecryptfs_privileged_open(struct file **lower_file,
136 struct vfsmount *lower_mnt, 132 struct vfsmount *lower_mnt,
137 const struct cred *cred) 133 const struct cred *cred)
138{ 134{
139 struct ecryptfs_open_req *req; 135 struct ecryptfs_open_req req;
140 int flags = O_LARGEFILE; 136 int flags = O_LARGEFILE;
141 int rc = 0; 137 int rc = 0;
142 138
139 init_completion(&req.done);
140 req.lower_file = lower_file;
141 req.path.dentry = lower_dentry;
142 req.path.mnt = lower_mnt;
143
143 /* Corresponding dput() and mntput() are done when the 144 /* Corresponding dput() and mntput() are done when the
144 * lower file is fput() when all eCryptfs files for the inode are 145 * lower file is fput() when all eCryptfs files for the inode are
145 * released. */ 146 * released. */
146 dget(lower_dentry);
147 mntget(lower_mnt);
148 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; 147 flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
149 (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); 148 (*lower_file) = dentry_open(&req.path, flags, cred);
150 if (!IS_ERR(*lower_file)) 149 if (!IS_ERR(*lower_file))
151 goto out; 150 goto out;
152 if ((flags & O_ACCMODE) == O_RDONLY) { 151 if ((flags & O_ACCMODE) == O_RDONLY) {
153 rc = PTR_ERR((*lower_file)); 152 rc = PTR_ERR((*lower_file));
154 goto out; 153 goto out;
155 } 154 }
156 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
157 if (!req) {
158 rc = -ENOMEM;
159 goto out;
160 }
161 mutex_init(&req->mux);
162 req->lower_file = lower_file;
163 req->lower_dentry = lower_dentry;
164 req->lower_mnt = lower_mnt;
165 init_waitqueue_head(&req->wait);
166 req->flags = 0;
167 mutex_lock(&ecryptfs_kthread_ctl.mux); 155 mutex_lock(&ecryptfs_kthread_ctl.mux);
168 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) { 156 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
169 rc = -EIO; 157 rc = -EIO;
@@ -171,27 +159,14 @@ int ecryptfs_privileged_open(struct file **lower_file,
171 printk(KERN_ERR "%s: We are in the middle of shutting down; " 159 printk(KERN_ERR "%s: We are in the middle of shutting down; "
172 "aborting privileged request to open lower file\n", 160 "aborting privileged request to open lower file\n",
173 __func__); 161 __func__);
174 goto out_free; 162 goto out;
175 } 163 }
176 list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list); 164 list_add_tail(&req.kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
177 mutex_unlock(&ecryptfs_kthread_ctl.mux); 165 mutex_unlock(&ecryptfs_kthread_ctl.mux);
178 wake_up(&ecryptfs_kthread_ctl.wait); 166 wake_up(&ecryptfs_kthread_ctl.wait);
179 wait_event(req->wait, (req->flags != 0)); 167 wait_for_completion(&req.done);
180 mutex_lock(&req->mux); 168 if (IS_ERR(*lower_file))
181 BUG_ON(req->flags == 0); 169 rc = PTR_ERR(*lower_file);
182 if (req->flags & ECRYPTFS_REQ_DROPPED
183 || req->flags & ECRYPTFS_REQ_ZOMBIE) {
184 rc = -EIO;
185 printk(KERN_WARNING "%s: Privileged open request dropped\n",
186 __func__);
187 goto out_unlock;
188 }
189 if (IS_ERR(*req->lower_file))
190 rc = PTR_ERR(*req->lower_file);
191out_unlock:
192 mutex_unlock(&req->mux);
193out_free:
194 kmem_cache_free(ecryptfs_open_req_cache, req);
195out: 170out:
196 return rc; 171 return rc;
197} 172}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 68954937a071..1c0b3b6b75c6 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -499,13 +499,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
499 goto out; 499 goto out;
500 } 500 }
501 501
502 s = sget(fs_type, NULL, set_anon_super, NULL); 502 s = sget(fs_type, NULL, set_anon_super, flags, NULL);
503 if (IS_ERR(s)) { 503 if (IS_ERR(s)) {
504 rc = PTR_ERR(s); 504 rc = PTR_ERR(s);
505 goto out; 505 goto out;
506 } 506 }
507 507
508 s->s_flags = flags;
509 rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); 508 rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
510 if (rc) 509 if (rc)
511 goto out1; 510 goto out1;
@@ -682,11 +681,6 @@ static struct ecryptfs_cache_info {
682 .name = "ecryptfs_key_tfm_cache", 681 .name = "ecryptfs_key_tfm_cache",
683 .size = sizeof(struct ecryptfs_key_tfm), 682 .size = sizeof(struct ecryptfs_key_tfm),
684 }, 683 },
685 {
686 .cache = &ecryptfs_open_req_cache,
687 .name = "ecryptfs_open_req_cache",
688 .size = sizeof(struct ecryptfs_open_req),
689 },
690}; 684};
691 685
692static void ecryptfs_free_kmem_caches(void) 686static void ecryptfs_free_kmem_caches(void)
diff --git a/fs/efs/efs.h b/fs/efs/efs.h
index d8305b582ab0..5528926ac7f6 100644
--- a/fs/efs/efs.h
+++ b/fs/efs/efs.h
@@ -129,7 +129,7 @@ extern struct inode *efs_iget(struct super_block *, unsigned long);
129extern efs_block_t efs_map_block(struct inode *, efs_block_t); 129extern efs_block_t efs_map_block(struct inode *, efs_block_t);
130extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); 130extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int);
131 131
132extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); 132extern struct dentry *efs_lookup(struct inode *, struct dentry *, unsigned int);
133extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, 133extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid,
134 int fh_len, int fh_type); 134 int fh_len, int fh_type);
135extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, 135extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid,
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 832b10ded82f..96f66d213a19 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -58,7 +58,8 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
58 return(0); 58 return(0);
59} 59}
60 60
61struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { 61struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
62{
62 efs_ino_t inodenum; 63 efs_ino_t inodenum;
63 struct inode *inode = NULL; 64 struct inode *inode = NULL;
64 65
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 74598f67efeb..1c8b55670804 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
1710 goto error_tgt_fput; 1710 goto error_tgt_fput;
1711 1711
1712 /* Check if EPOLLWAKEUP is allowed */ 1712 /* Check if EPOLLWAKEUP is allowed */
1713 if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) 1713 if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND))
1714 epds.events &= ~EPOLLWAKEUP; 1714 epds.events &= ~EPOLLWAKEUP;
1715 1715
1716 /* 1716 /*
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index fc7161d6bf6b..4731fd991efe 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -46,7 +46,7 @@ static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode)
46} 46}
47 47
48static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry, 48static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
49 struct nameidata *nd) 49 unsigned int flags)
50{ 50{
51 struct inode *inode; 51 struct inode *inode;
52 ino_t ino; 52 ino_t ino;
@@ -60,7 +60,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
60} 60}
61 61
62static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 62static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
63 struct nameidata *nd) 63 bool excl)
64{ 64{
65 struct inode *inode = exofs_new_inode(dir, mode); 65 struct inode *inode = exofs_new_inode(dir, mode);
66 int err = PTR_ERR(inode); 66 int err = PTR_ERR(inode);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 49cf230554a2..24a49d47e935 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -735,13 +735,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)
735out: 735out:
736 ios->numdevs = devs_in_group; 736 ios->numdevs = devs_in_group;
737 ios->pages_consumed = cur_pg; 737 ios->pages_consumed = cur_pg;
738 if (unlikely(ret)) { 738 return ret;
739 if (length == ios->length)
740 return ret;
741 else
742 ios->length -= length;
743 }
744 return 0;
745} 739}
746 740
747int ore_create(struct ore_io_state *ios) 741int ore_create(struct ore_io_state *ios)
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index d222c77cfa1b..5f376d14fdcc 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -144,26 +144,26 @@ static void _sp2d_reset(struct __stripe_pages_2d *sp2d,
144{ 144{
145 unsigned data_devs = sp2d->data_devs; 145 unsigned data_devs = sp2d->data_devs;
146 unsigned group_width = data_devs + sp2d->parity; 146 unsigned group_width = data_devs + sp2d->parity;
147 unsigned p; 147 int p, c;
148 148
149 if (!sp2d->needed) 149 if (!sp2d->needed)
150 return; 150 return;
151 151
152 for (p = 0; p < sp2d->pages_in_unit; p++) { 152 for (c = data_devs - 1; c >= 0; --c)
153 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 153 for (p = sp2d->pages_in_unit - 1; p >= 0; --p) {
154 154 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
155 if (_1ps->write_count < group_width) {
156 unsigned c;
157 155
158 for (c = 0; c < data_devs; c++) 156 if (_1ps->page_is_read[c]) {
159 if (_1ps->page_is_read[c]) { 157 struct page *page = _1ps->pages[c];
160 struct page *page = _1ps->pages[c];
161 158
162 r4w->put_page(priv, page); 159 r4w->put_page(priv, page);
163 _1ps->page_is_read[c] = false; 160 _1ps->page_is_read[c] = false;
164 } 161 }
165 } 162 }
166 163
164 for (p = 0; p < sp2d->pages_in_unit; p++) {
165 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
166
167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages)); 167 memset(_1ps->pages, 0, group_width * sizeof(*_1ps->pages));
168 _1ps->write_count = 0; 168 _1ps->write_count = 0;
169 _1ps->tx = NULL; 169 _1ps->tx = NULL;
@@ -461,16 +461,12 @@ static void _mark_read4write_pages_uptodate(struct ore_io_state *ios, int ret)
461 * ios->sp2d[p][*], xor is calculated the same way. These pages are 461 * ios->sp2d[p][*], xor is calculated the same way. These pages are
462 * allocated/freed and don't go through cache 462 * allocated/freed and don't go through cache
463 */ 463 */
464static int _read_4_write(struct ore_io_state *ios) 464static int _read_4_write_first_stripe(struct ore_io_state *ios)
465{ 465{
466 struct ore_io_state *ios_read;
467 struct ore_striping_info read_si; 466 struct ore_striping_info read_si;
468 struct __stripe_pages_2d *sp2d = ios->sp2d; 467 struct __stripe_pages_2d *sp2d = ios->sp2d;
469 u64 offset = ios->si.first_stripe_start; 468 u64 offset = ios->si.first_stripe_start;
470 u64 last_stripe_end; 469 unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
471 unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
472 unsigned i, c, p, min_p = sp2d->pages_in_unit, max_p = -1;
473 int ret;
474 470
475 if (offset == ios->offset) /* Go to start collect $200 */ 471 if (offset == ios->offset) /* Go to start collect $200 */
476 goto read_last_stripe; 472 goto read_last_stripe;
@@ -478,6 +474,9 @@ static int _read_4_write(struct ore_io_state *ios)
478 min_p = _sp2d_min_pg(sp2d); 474 min_p = _sp2d_min_pg(sp2d);
479 max_p = _sp2d_max_pg(sp2d); 475 max_p = _sp2d_max_pg(sp2d);
480 476
477 ORE_DBGMSG("stripe_start=0x%llx ios->offset=0x%llx min_p=%d max_p=%d\n",
478 offset, ios->offset, min_p, max_p);
479
481 for (c = 0; ; c++) { 480 for (c = 0; ; c++) {
482 ore_calc_stripe_info(ios->layout, offset, 0, &read_si); 481 ore_calc_stripe_info(ios->layout, offset, 0, &read_si);
483 read_si.obj_offset += min_p * PAGE_SIZE; 482 read_si.obj_offset += min_p * PAGE_SIZE;
@@ -512,6 +511,18 @@ static int _read_4_write(struct ore_io_state *ios)
512 } 511 }
513 512
514read_last_stripe: 513read_last_stripe:
514 return 0;
515}
516
517static int _read_4_write_last_stripe(struct ore_io_state *ios)
518{
519 struct ore_striping_info read_si;
520 struct __stripe_pages_2d *sp2d = ios->sp2d;
521 u64 offset;
522 u64 last_stripe_end;
523 unsigned bytes_in_stripe = ios->si.bytes_in_stripe;
524 unsigned c, p, min_p = sp2d->pages_in_unit, max_p = -1;
525
515 offset = ios->offset + ios->length; 526 offset = ios->offset + ios->length;
516 if (offset % PAGE_SIZE) 527 if (offset % PAGE_SIZE)
517 _add_to_r4w_last_page(ios, &offset); 528 _add_to_r4w_last_page(ios, &offset);
@@ -527,15 +538,15 @@ read_last_stripe:
527 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, 538 c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1,
528 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); 539 ios->layout->mirrors_p1, read_si.par_dev, read_si.dev);
529 540
530 BUG_ON(ios->si.first_stripe_start + bytes_in_stripe != last_stripe_end);
531 /* unaligned IO must be within a single stripe */
532
533 if (min_p == sp2d->pages_in_unit) { 541 if (min_p == sp2d->pages_in_unit) {
534 /* Didn't do it yet */ 542 /* Didn't do it yet */
535 min_p = _sp2d_min_pg(sp2d); 543 min_p = _sp2d_min_pg(sp2d);
536 max_p = _sp2d_max_pg(sp2d); 544 max_p = _sp2d_max_pg(sp2d);
537 } 545 }
538 546
547 ORE_DBGMSG("offset=0x%llx stripe_end=0x%llx min_p=%d max_p=%d\n",
548 offset, last_stripe_end, min_p, max_p);
549
539 while (offset < last_stripe_end) { 550 while (offset < last_stripe_end) {
540 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p]; 551 struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];
541 552
@@ -568,6 +579,15 @@ read_last_stripe:
568 } 579 }
569 580
570read_it: 581read_it:
582 return 0;
583}
584
585static int _read_4_write_execute(struct ore_io_state *ios)
586{
587 struct ore_io_state *ios_read;
588 unsigned i;
589 int ret;
590
571 ios_read = ios->ios_read_4_write; 591 ios_read = ios->ios_read_4_write;
572 if (!ios_read) 592 if (!ios_read)
573 return 0; 593 return 0;
@@ -591,6 +611,8 @@ read_it:
591 } 611 }
592 612
593 _mark_read4write_pages_uptodate(ios_read, ret); 613 _mark_read4write_pages_uptodate(ios_read, ret);
614 ore_put_io_state(ios_read);
615 ios->ios_read_4_write = NULL; /* Might need a reuse at last stripe */
594 return 0; 616 return 0;
595} 617}
596 618
@@ -626,8 +648,11 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
626 /* If first stripe, Read in all read4write pages 648 /* If first stripe, Read in all read4write pages
627 * (if needed) before we calculate the first parity. 649 * (if needed) before we calculate the first parity.
628 */ 650 */
629 _read_4_write(ios); 651 _read_4_write_first_stripe(ios);
630 } 652 }
653 if (!cur_len) /* If last stripe r4w pages of last stripe */
654 _read_4_write_last_stripe(ios);
655 _read_4_write_execute(ios);
631 656
632 for (i = 0; i < num_pages; i++) { 657 for (i = 0; i < num_pages; i++) {
633 pages[i] = _raid_page_alloc(); 658 pages[i] = _raid_page_alloc();
@@ -654,34 +679,14 @@ int _ore_add_parity_unit(struct ore_io_state *ios,
654 679
655int _ore_post_alloc_raid_stuff(struct ore_io_state *ios) 680int _ore_post_alloc_raid_stuff(struct ore_io_state *ios)
656{ 681{
657 struct ore_layout *layout = ios->layout;
658
659 if (ios->parity_pages) { 682 if (ios->parity_pages) {
683 struct ore_layout *layout = ios->layout;
660 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE; 684 unsigned pages_in_unit = layout->stripe_unit / PAGE_SIZE;
661 unsigned stripe_size = ios->si.bytes_in_stripe;
662 u64 last_stripe, first_stripe;
663 685
664 if (_sp2d_alloc(pages_in_unit, layout->group_width, 686 if (_sp2d_alloc(pages_in_unit, layout->group_width,
665 layout->parity, &ios->sp2d)) { 687 layout->parity, &ios->sp2d)) {
666 return -ENOMEM; 688 return -ENOMEM;
667 } 689 }
668
669 /* Round io down to last full strip */
670 first_stripe = div_u64(ios->offset, stripe_size);
671 last_stripe = div_u64(ios->offset + ios->length, stripe_size);
672
673 /* If an IO spans more then a single stripe it must end at
674 * a stripe boundary. The reminder at the end is pushed into the
675 * next IO.
676 */
677 if (last_stripe != first_stripe) {
678 ios->length = last_stripe * stripe_size - ios->offset;
679
680 BUG_ON(!ios->length);
681 ios->nr_pages = (ios->length + PAGE_SIZE - 1) /
682 PAGE_SIZE;
683 ios->si.length = ios->length; /*make it consistent */
684 }
685 } 690 }
686 return 0; 691 return 0;
687} 692}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b0201ca6e9c6..29ab099e3e08 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -19,19 +19,19 @@
19#define dprintk(fmt, args...) do{}while(0) 19#define dprintk(fmt, args...) do{}while(0)
20 20
21 21
22static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, 22static int get_name(const struct path *path, char *name, struct dentry *child);
23 struct dentry *child);
24 23
25 24
26static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, 25static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir,
27 char *name, struct dentry *child) 26 char *name, struct dentry *child)
28{ 27{
29 const struct export_operations *nop = dir->d_sb->s_export_op; 28 const struct export_operations *nop = dir->d_sb->s_export_op;
29 struct path path = {.mnt = mnt, .dentry = dir};
30 30
31 if (nop->get_name) 31 if (nop->get_name)
32 return nop->get_name(dir, name, child); 32 return nop->get_name(dir, name, child);
33 else 33 else
34 return get_name(mnt, dir, name, child); 34 return get_name(&path, name, child);
35} 35}
36 36
37/* 37/*
@@ -44,13 +44,14 @@ find_acceptable_alias(struct dentry *result,
44{ 44{
45 struct dentry *dentry, *toput = NULL; 45 struct dentry *dentry, *toput = NULL;
46 struct inode *inode; 46 struct inode *inode;
47 struct hlist_node *p;
47 48
48 if (acceptable(context, result)) 49 if (acceptable(context, result))
49 return result; 50 return result;
50 51
51 inode = result->d_inode; 52 inode = result->d_inode;
52 spin_lock(&inode->i_lock); 53 spin_lock(&inode->i_lock);
53 list_for_each_entry(dentry, &inode->i_dentry, d_alias) { 54 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
54 dget(dentry); 55 dget(dentry);
55 spin_unlock(&inode->i_lock); 56 spin_unlock(&inode->i_lock);
56 if (toput) 57 if (toput)
@@ -248,11 +249,10 @@ static int filldir_one(void * __buf, const char * name, int len,
248 * calls readdir on the parent until it finds an entry with 249 * calls readdir on the parent until it finds an entry with
249 * the same inode number as the child, and returns that. 250 * the same inode number as the child, and returns that.
250 */ 251 */
251static int get_name(struct vfsmount *mnt, struct dentry *dentry, 252static int get_name(const struct path *path, char *name, struct dentry *child)
252 char *name, struct dentry *child)
253{ 253{
254 const struct cred *cred = current_cred(); 254 const struct cred *cred = current_cred();
255 struct inode *dir = dentry->d_inode; 255 struct inode *dir = path->dentry->d_inode;
256 int error; 256 int error;
257 struct file *file; 257 struct file *file;
258 struct getdents_callback buffer; 258 struct getdents_callback buffer;
@@ -266,7 +266,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,
266 /* 266 /*
267 * Open the directory ... 267 * Open the directory ...
268 */ 268 */
269 file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred); 269 file = dentry_open(path, O_RDONLY, cred);
270 error = PTR_ERR(file); 270 error = PTR_ERR(file);
271 if (IS_ERR(file)) 271 if (IS_ERR(file))
272 goto out; 272 goto out;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index f663a67d7bf0..73b0d9519836 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -41,8 +41,8 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
41{ 41{
42 int err = ext2_add_link(dentry, inode); 42 int err = ext2_add_link(dentry, inode);
43 if (!err) { 43 if (!err) {
44 d_instantiate(dentry, inode);
45 unlock_new_inode(inode); 44 unlock_new_inode(inode);
45 d_instantiate(dentry, inode);
46 return 0; 46 return 0;
47 } 47 }
48 inode_dec_link_count(inode); 48 inode_dec_link_count(inode);
@@ -55,7 +55,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
55 * Methods themselves. 55 * Methods themselves.
56 */ 56 */
57 57
58static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 58static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
59{ 59{
60 struct inode * inode; 60 struct inode * inode;
61 ino_t ino; 61 ino_t ino;
@@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child)
94 * If the create succeeds, we fill in the inode information 94 * If the create succeeds, we fill in the inode information
95 * with d_instantiate(). 95 * with d_instantiate().
96 */ 96 */
97static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd) 97static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
98{ 98{
99 struct inode *inode; 99 struct inode *inode;
100 100
@@ -242,8 +242,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
242 if (err) 242 if (err)
243 goto out_fail; 243 goto out_fail;
244 244
245 d_instantiate(dentry, inode);
246 unlock_new_inode(inode); 245 unlock_new_inode(inode);
246 d_instantiate(dentry, inode);
247out: 247out:
248 return err; 248 return err;
249 249
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b3621cb7ea31..9f311d27b16f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -771,13 +771,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
771 err = -ENOMEM; 771 err = -ENOMEM;
772 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 772 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
773 if (!sbi) 773 if (!sbi)
774 goto failed_unlock; 774 goto failed;
775 775
776 sbi->s_blockgroup_lock = 776 sbi->s_blockgroup_lock =
777 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); 777 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
778 if (!sbi->s_blockgroup_lock) { 778 if (!sbi->s_blockgroup_lock) {
779 kfree(sbi); 779 kfree(sbi);
780 goto failed_unlock; 780 goto failed;
781 } 781 }
782 sb->s_fs_info = sbi; 782 sb->s_fs_info = sbi;
783 sbi->s_sb_block = sb_block; 783 sbi->s_sb_block = sb_block;
@@ -1130,7 +1130,7 @@ failed_sbi:
1130 sb->s_fs_info = NULL; 1130 sb->s_fs_info = NULL;
1131 kfree(sbi->s_blockgroup_lock); 1131 kfree(sbi->s_blockgroup_lock);
1132 kfree(sbi); 1132 kfree(sbi);
1133failed_unlock: 1133failed:
1134 return ret; 1134 return ret;
1135} 1135}
1136 1136
@@ -1184,6 +1184,12 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
1184 struct ext2_sb_info *sbi = EXT2_SB(sb); 1184 struct ext2_sb_info *sbi = EXT2_SB(sb);
1185 struct ext2_super_block *es = EXT2_SB(sb)->s_es; 1185 struct ext2_super_block *es = EXT2_SB(sb)->s_es;
1186 1186
1187 /*
1188 * Write quota structures to quota file, sync_blockdev() will write
1189 * them to disk later
1190 */
1191 dquot_writeback_dquots(sb, -1);
1192
1187 spin_lock(&sbi->s_lock); 1193 spin_lock(&sbi->s_lock);
1188 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { 1194 if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
1189 ext2_debug("setting valid to 0\n"); 1195 ext2_debug("setting valid to 0\n");
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 92490e9f85ca..c8fff930790d 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -300,10 +300,11 @@ loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
300{ 300{
301 struct inode *inode = file->f_mapping->host; 301 struct inode *inode = file->f_mapping->host;
302 int dx_dir = is_dx_dir(inode); 302 int dx_dir = is_dx_dir(inode);
303 loff_t htree_max = ext3_get_htree_eof(file);
303 304
304 if (likely(dx_dir)) 305 if (likely(dx_dir))
305 return generic_file_llseek_size(file, offset, origin, 306 return generic_file_llseek_size(file, offset, origin,
306 ext3_get_htree_eof(file)); 307 htree_max, htree_max);
307 else 308 else
308 return generic_file_llseek(file, offset, origin); 309 return generic_file_llseek(file, offset, origin);
309} 310}
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index d4dff278cbd8..b31dbd4c46ad 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -92,8 +92,13 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
92 * disk caches manually so that data really is on persistent 92 * disk caches manually so that data really is on persistent
93 * storage 93 * storage
94 */ 94 */
95 if (needs_barrier) 95 if (needs_barrier) {
96 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 96 int err;
97
98 err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
99 if (!ret)
100 ret = err;
101 }
97out: 102out:
98 trace_ext3_sync_file_exit(inode, ret); 103 trace_ext3_sync_file_exit(inode, ret);
99 return ret; 104 return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index eeb63dfc5d20..8f4fddac01a6 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1011,7 +1011,7 @@ errout:
1011 return NULL; 1011 return NULL;
1012} 1012}
1013 1013
1014static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 1014static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
1015{ 1015{
1016 struct inode * inode; 1016 struct inode * inode;
1017 struct ext3_dir_entry_2 * de; 1017 struct ext3_dir_entry_2 * de;
@@ -1671,8 +1671,8 @@ static int ext3_add_nondir(handle_t *handle,
1671 int err = ext3_add_entry(handle, dentry, inode); 1671 int err = ext3_add_entry(handle, dentry, inode);
1672 if (!err) { 1672 if (!err) {
1673 ext3_mark_inode_dirty(handle, inode); 1673 ext3_mark_inode_dirty(handle, inode);
1674 d_instantiate(dentry, inode);
1675 unlock_new_inode(inode); 1674 unlock_new_inode(inode);
1675 d_instantiate(dentry, inode);
1676 return 0; 1676 return 0;
1677 } 1677 }
1678 drop_nlink(inode); 1678 drop_nlink(inode);
@@ -1690,7 +1690,7 @@ static int ext3_add_nondir(handle_t *handle,
1690 * with d_instantiate(). 1690 * with d_instantiate().
1691 */ 1691 */
1692static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode, 1692static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
1693 struct nameidata *nd) 1693 bool excl)
1694{ 1694{
1695 handle_t *handle; 1695 handle_t *handle;
1696 struct inode * inode; 1696 struct inode * inode;
@@ -1836,8 +1836,8 @@ out_clear_inode:
1836 if (err) 1836 if (err)
1837 goto out_clear_inode; 1837 goto out_clear_inode;
1838 1838
1839 d_instantiate(dentry, inode);
1840 unlock_new_inode(inode); 1839 unlock_new_inode(inode);
1840 d_instantiate(dentry, inode);
1841out_stop: 1841out_stop:
1842 brelse(dir_block); 1842 brelse(dir_block);
1843 ext3_journal_stop(handle); 1843 ext3_journal_stop(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 8c3a44b7c375..ff9bcdc5b0d5 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2058,7 +2058,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
2058 goto failed_mount3; 2058 goto failed_mount3;
2059 } 2059 }
2060 2060
2061 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2061 if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY))
2062 sb->s_flags |= MS_RDONLY;
2062 2063
2063 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; 2064 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2064 ext3_orphan_cleanup(sb, es); 2065 ext3_orphan_cleanup(sb, es);
@@ -2526,6 +2527,11 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
2526 tid_t target; 2527 tid_t target;
2527 2528
2528 trace_ext3_sync_fs(sb, wait); 2529 trace_ext3_sync_fs(sb, wait);
2530 /*
2531 * Writeback quota in non-journalled quota case - journalled quota has
2532 * no dirty dquots
2533 */
2534 dquot_writeback_dquots(sb, -1);
2529 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { 2535 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2530 if (wait) 2536 if (wait)
2531 log_wait_commit(EXT3_SB(sb)->s_journal, target); 2537 log_wait_commit(EXT3_SB(sb)->s_journal, target);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index aa39e600d159..8e07d2a5a139 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -324,74 +324,27 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
324 324
325 325
326/* 326/*
327 * ext4_dir_llseek() based on generic_file_llseek() to handle both 327 * ext4_dir_llseek() calls generic_file_llseek_size to handle htree
328 * non-htree and htree directories, where the "offset" is in terms 328 * directories, where the "offset" is in terms of the filename hash
329 * of the filename hash value instead of the byte offset. 329 * value instead of the byte offset.
330 * 330 *
331 * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX) 331 * Because we may return a 64-bit hash that is well beyond offset limits,
332 * will be invalid once the directory was converted into a dx directory 332 * we need to pass the max hash as the maximum allowable offset in
333 * the htree directory case.
334 *
335 * For non-htree, ext4_llseek already chooses the proper max offset.
333 */ 336 */
334loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin) 337loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin)
335{ 338{
336 struct inode *inode = file->f_mapping->host; 339 struct inode *inode = file->f_mapping->host;
337 loff_t ret = -EINVAL;
338 int dx_dir = is_dx_dir(inode); 340 int dx_dir = is_dx_dir(inode);
341 loff_t htree_max = ext4_get_htree_eof(file);
339 342
340 mutex_lock(&inode->i_mutex); 343 if (likely(dx_dir))
341 344 return generic_file_llseek_size(file, offset, origin,
342 /* NOTE: relative offsets with dx directories might not work 345 htree_max, htree_max);
343 * as expected, as it is difficult to figure out the 346 else
344 * correct offset between dx hashes */ 347 return ext4_llseek(file, offset, origin);
345
346 switch (origin) {
347 case SEEK_END:
348 if (unlikely(offset > 0))
349 goto out_err; /* not supported for directories */
350
351 /* so only negative offsets are left, does that have a
352 * meaning for directories at all? */
353 if (dx_dir)
354 offset += ext4_get_htree_eof(file);
355 else
356 offset += inode->i_size;
357 break;
358 case SEEK_CUR:
359 /*
360 * Here we special-case the lseek(fd, 0, SEEK_CUR)
361 * position-querying operation. Avoid rewriting the "same"
362 * f_pos value back to the file because a concurrent read(),
363 * write() or lseek() might have altered it
364 */
365 if (offset == 0) {
366 offset = file->f_pos;
367 goto out_ok;
368 }
369
370 offset += file->f_pos;
371 break;
372 }
373
374 if (unlikely(offset < 0))
375 goto out_err;
376
377 if (!dx_dir) {
378 if (offset > inode->i_sb->s_maxbytes)
379 goto out_err;
380 } else if (offset > ext4_get_htree_eof(file))
381 goto out_err;
382
383 /* Special lock needed here? */
384 if (offset != file->f_pos) {
385 file->f_pos = offset;
386 file->f_version = 0;
387 }
388
389out_ok:
390 ret = offset;
391out_err:
392 mutex_unlock(&inode->i_mutex);
393
394 return ret;
395} 348}
396 349
397/* 350/*
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8c7642a00054..782eecb57e43 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -211,9 +211,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
211} 211}
212 212
213/* 213/*
214 * ext4_llseek() copied from generic_file_llseek() to handle both 214 * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
215 * block-mapped and extent-mapped maxbytes values. This should 215 * by calling generic_file_llseek_size() with the appropriate maxbytes
216 * otherwise be identical with generic_file_llseek(). 216 * value for each.
217 */ 217 */
218loff_t ext4_llseek(struct file *file, loff_t offset, int origin) 218loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
219{ 219{
@@ -225,7 +225,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
225 else 225 else
226 maxbytes = inode->i_sb->s_maxbytes; 226 maxbytes = inode->i_sb->s_maxbytes;
227 227
228 return generic_file_llseek_size(file, offset, origin, maxbytes); 228 return generic_file_llseek_size(file, offset, origin,
229 maxbytes, i_size_read(inode));
229} 230}
230 231
231const struct file_operations ext4_file_operations = { 232const struct file_operations ext4_file_operations = {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index bb6c7d811313..2a1dcea4f12e 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -135,14 +135,7 @@ static int ext4_sync_parent(struct inode *inode)
135 inode = igrab(inode); 135 inode = igrab(inode);
136 while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { 136 while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
137 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); 137 ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
138 dentry = NULL; 138 dentry = d_find_any_alias(inode);
139 spin_lock(&inode->i_lock);
140 if (!list_empty(&inode->i_dentry)) {
141 dentry = list_first_entry(&inode->i_dentry,
142 struct dentry, d_alias);
143 dget(dentry);
144 }
145 spin_unlock(&inode->i_lock);
146 if (!dentry) 139 if (!dentry)
147 break; 140 break;
148 next = igrab(dentry->d_parent->d_inode); 141 next = igrab(dentry->d_parent->d_inode);
@@ -232,7 +225,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
232 225
233 if (!journal) { 226 if (!journal) {
234 ret = __sync_inode(inode, datasync); 227 ret = __sync_inode(inode, datasync);
235 if (!ret && !list_empty(&inode->i_dentry)) 228 if (!ret && !hlist_empty(&inode->i_dentry))
236 ret = ext4_sync_parent(inode); 229 ret = ext4_sync_parent(inode);
237 goto out; 230 goto out;
238 } 231 }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e34deac3f366..7f7dad787603 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -268,7 +268,6 @@ group_extend_out:
268 err = ext4_move_extents(filp, donor_filp, me.orig_start, 268 err = ext4_move_extents(filp, donor_filp, me.orig_start,
269 me.donor_start, me.len, &me.moved_len); 269 me.donor_start, me.len, &me.moved_len);
270 mnt_drop_write_file(filp); 270 mnt_drop_write_file(filp);
271 mnt_drop_write(filp->f_path.mnt);
272 271
273 if (copy_to_user((struct move_extent __user *)arg, 272 if (copy_to_user((struct move_extent __user *)arg,
274 &me, sizeof(me))) 273 &me, sizeof(me)))
@@ -390,7 +389,7 @@ group_add_out:
390 if (err) 389 if (err)
391 return err; 390 return err;
392 391
393 err = mnt_want_write(filp->f_path.mnt); 392 err = mnt_want_write_file(filp);
394 if (err) 393 if (err)
395 goto resizefs_out; 394 goto resizefs_out;
396 395
@@ -402,7 +401,7 @@ group_add_out:
402 } 401 }
403 if (err == 0) 402 if (err == 0)
404 err = err2; 403 err = err2;
405 mnt_drop_write(filp->f_path.mnt); 404 mnt_drop_write_file(filp);
406resizefs_out: 405resizefs_out:
407 ext4_resize_end(sb); 406 ext4_resize_end(sb);
408 return err; 407 return err;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5845cd97bf8b..d0d3f0e87f99 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1312,7 +1312,7 @@ errout:
1312 return NULL; 1312 return NULL;
1313} 1313}
1314 1314
1315static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1315static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1316{ 1316{
1317 struct inode *inode; 1317 struct inode *inode;
1318 struct ext4_dir_entry_2 *de; 1318 struct ext4_dir_entry_2 *de;
@@ -2072,8 +2072,8 @@ static int ext4_add_nondir(handle_t *handle,
2072 int err = ext4_add_entry(handle, dentry, inode); 2072 int err = ext4_add_entry(handle, dentry, inode);
2073 if (!err) { 2073 if (!err) {
2074 ext4_mark_inode_dirty(handle, inode); 2074 ext4_mark_inode_dirty(handle, inode);
2075 d_instantiate(dentry, inode);
2076 unlock_new_inode(inode); 2075 unlock_new_inode(inode);
2076 d_instantiate(dentry, inode);
2077 return 0; 2077 return 0;
2078 } 2078 }
2079 drop_nlink(inode); 2079 drop_nlink(inode);
@@ -2091,7 +2091,7 @@ static int ext4_add_nondir(handle_t *handle,
2091 * with d_instantiate(). 2091 * with d_instantiate().
2092 */ 2092 */
2093static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, 2093static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2094 struct nameidata *nd) 2094 bool excl)
2095{ 2095{
2096 handle_t *handle; 2096 handle_t *handle;
2097 struct inode *inode; 2097 struct inode *inode;
@@ -2249,8 +2249,8 @@ out_clear_inode:
2249 err = ext4_mark_inode_dirty(handle, dir); 2249 err = ext4_mark_inode_dirty(handle, dir);
2250 if (err) 2250 if (err)
2251 goto out_clear_inode; 2251 goto out_clear_inode;
2252 d_instantiate(dentry, inode);
2253 unlock_new_inode(inode); 2252 unlock_new_inode(inode);
2253 d_instantiate(dentry, inode);
2254out_stop: 2254out_stop:
2255 brelse(dir_block); 2255 brelse(dir_block);
2256 ext4_journal_stop(handle); 2256 ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb7aa3e4ef05..d8759401ecae 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4325,6 +4325,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4325 4325
4326 trace_ext4_sync_fs(sb, wait); 4326 trace_ext4_sync_fs(sb, wait);
4327 flush_workqueue(sbi->dio_unwritten_wq); 4327 flush_workqueue(sbi->dio_unwritten_wq);
4328 /*
4329 * Writeback quota in non-journalled quota case - journalled quota has
4330 * no dirty dquots
4331 */
4332 dquot_writeback_dquots(sb, -1);
4328 if (jbd2_journal_start_commit(sbi->s_journal, &target)) { 4333 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4329 if (wait) 4334 if (wait)
4330 jbd2_log_wait_commit(sbi->s_journal, target); 4335 jbd2_log_wait_commit(sbi->s_journal, target);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index c5938c9084b9..70d993a93805 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -201,7 +201,7 @@ static const struct dentry_operations msdos_dentry_operations = {
201 201
202/***** Get inode using directory and name */ 202/***** Get inode using directory and name */
203static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, 203static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
204 struct nameidata *nd) 204 unsigned int flags)
205{ 205{
206 struct super_block *sb = dir->i_sb; 206 struct super_block *sb = dir->i_sb;
207 struct fat_slot_info sinfo; 207 struct fat_slot_info sinfo;
@@ -265,7 +265,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
265 265
266/***** Create a file */ 266/***** Create a file */
267static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode, 267static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
268 struct nameidata *nd) 268 bool excl)
269{ 269{
270 struct super_block *sb = dir->i_sb; 270 struct super_block *sb = dir->i_sb;
271 struct inode *inode = NULL; 271 struct inode *inode = NULL;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 98ae804f5273..6cc480652433 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -41,9 +41,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
41 return ret; 41 return ret;
42} 42}
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, unsigned int flags)
45{ 45{
46 if (nd && nd->flags & LOOKUP_RCU) 46 if (flags & LOOKUP_RCU)
47 return -ECHILD; 47 return -ECHILD;
48 48
49 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
@@ -52,9 +52,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
52 return vfat_revalidate_shortname(dentry); 52 return vfat_revalidate_shortname(dentry);
53} 53}
54 54
55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
56{ 56{
57 if (nd && nd->flags & LOOKUP_RCU) 57 if (flags & LOOKUP_RCU)
58 return -ECHILD; 58 return -ECHILD;
59 59
60 /* 60 /*
@@ -74,7 +74,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
74 * This may be nfsd (or something), anyway, we can't see the 74 * This may be nfsd (or something), anyway, we can't see the
75 * intent of this. So, since this can be for creation, drop it. 75 * intent of this. So, since this can be for creation, drop it.
76 */ 76 */
77 if (!nd) 77 if (!flags)
78 return 0; 78 return 0;
79 79
80 /* 80 /*
@@ -82,7 +82,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
82 * case sensitive name which is specified by user if this is 82 * case sensitive name which is specified by user if this is
83 * for creation. 83 * for creation.
84 */ 84 */
85 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 85 if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
86 return 0; 86 return 0;
87 87
88 return vfat_revalidate_shortname(dentry); 88 return vfat_revalidate_shortname(dentry);
@@ -714,7 +714,7 @@ static int vfat_d_anon_disconn(struct dentry *dentry)
714} 714}
715 715
716static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, 716static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry,
717 struct nameidata *nd) 717 unsigned int flags)
718{ 718{
719 struct super_block *sb = dir->i_sb; 719 struct super_block *sb = dir->i_sb;
720 struct fat_slot_info sinfo; 720 struct fat_slot_info sinfo;
@@ -772,7 +772,7 @@ error:
772} 772}
773 773
774static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, 774static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
775 struct nameidata *nd) 775 bool excl)
776{ 776{
777 struct super_block *sb = dir->i_sb; 777 struct super_block *sb = dir->i_sb;
778 struct inode *inode; 778 struct inode *inode;
diff --git a/fs/fifo.c b/fs/fifo.c
index b1a524d798e7..cf6f4345ceb0 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -14,7 +14,7 @@
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/pipe_fs_i.h> 15#include <linux/pipe_fs_i.h>
16 16
17static void wait_for_partner(struct inode* inode, unsigned int *cnt) 17static int wait_for_partner(struct inode* inode, unsigned int *cnt)
18{ 18{
19 int cur = *cnt; 19 int cur = *cnt;
20 20
@@ -23,6 +23,7 @@ static void wait_for_partner(struct inode* inode, unsigned int *cnt)
23 if (signal_pending(current)) 23 if (signal_pending(current))
24 break; 24 break;
25 } 25 }
26 return cur == *cnt ? -ERESTARTSYS : 0;
26} 27}
27 28
28static void wake_up_partner(struct inode* inode) 29static void wake_up_partner(struct inode* inode)
@@ -67,8 +68,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
67 * seen a writer */ 68 * seen a writer */
68 filp->f_version = pipe->w_counter; 69 filp->f_version = pipe->w_counter;
69 } else { 70 } else {
70 wait_for_partner(inode, &pipe->w_counter); 71 if (wait_for_partner(inode, &pipe->w_counter))
71 if(signal_pending(current))
72 goto err_rd; 72 goto err_rd;
73 } 73 }
74 } 74 }
@@ -90,8 +90,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
90 wake_up_partner(inode); 90 wake_up_partner(inode);
91 91
92 if (!pipe->readers) { 92 if (!pipe->readers) {
93 wait_for_partner(inode, &pipe->r_counter); 93 if (wait_for_partner(inode, &pipe->r_counter))
94 if (signal_pending(current))
95 goto err_wr; 94 goto err_wr;
96 } 95 }
97 break; 96 break;
diff --git a/fs/file_table.c b/fs/file_table.c
index a305d9e2d1b2..b3fc4d67a26b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -23,6 +23,8 @@
23#include <linux/lglock.h> 23#include <linux/lglock.h>
24#include <linux/percpu_counter.h> 24#include <linux/percpu_counter.h>
25#include <linux/percpu.h> 25#include <linux/percpu.h>
26#include <linux/hardirq.h>
27#include <linux/task_work.h>
26#include <linux/ima.h> 28#include <linux/ima.h>
27 29
28#include <linux/atomic.h> 30#include <linux/atomic.h>
@@ -251,7 +253,6 @@ static void __fput(struct file *file)
251 } 253 }
252 fops_put(file->f_op); 254 fops_put(file->f_op);
253 put_pid(file->f_owner.pid); 255 put_pid(file->f_owner.pid);
254 file_sb_list_del(file);
255 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) 256 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
256 i_readcount_dec(inode); 257 i_readcount_dec(inode);
257 if (file->f_mode & FMODE_WRITE) 258 if (file->f_mode & FMODE_WRITE)
@@ -263,10 +264,77 @@ static void __fput(struct file *file)
263 mntput(mnt); 264 mntput(mnt);
264} 265}
265 266
267static DEFINE_SPINLOCK(delayed_fput_lock);
268static LIST_HEAD(delayed_fput_list);
269static void delayed_fput(struct work_struct *unused)
270{
271 LIST_HEAD(head);
272 spin_lock_irq(&delayed_fput_lock);
273 list_splice_init(&delayed_fput_list, &head);
274 spin_unlock_irq(&delayed_fput_lock);
275 while (!list_empty(&head)) {
276 struct file *f = list_first_entry(&head, struct file, f_u.fu_list);
277 list_del_init(&f->f_u.fu_list);
278 __fput(f);
279 }
280}
281
282static void ____fput(struct callback_head *work)
283{
284 __fput(container_of(work, struct file, f_u.fu_rcuhead));
285}
286
287/*
288 * If kernel thread really needs to have the final fput() it has done
289 * to complete, call this. The only user right now is the boot - we
290 * *do* need to make sure our writes to binaries on initramfs has
291 * not left us with opened struct file waiting for __fput() - execve()
292 * won't work without that. Please, don't add more callers without
293 * very good reasons; in particular, never call that with locks
294 * held and never call that from a thread that might need to do
295 * some work on any kind of umount.
296 */
297void flush_delayed_fput(void)
298{
299 delayed_fput(NULL);
300}
301
302static DECLARE_WORK(delayed_fput_work, delayed_fput);
303
266void fput(struct file *file) 304void fput(struct file *file)
267{ 305{
268 if (atomic_long_dec_and_test(&file->f_count)) 306 if (atomic_long_dec_and_test(&file->f_count)) {
307 struct task_struct *task = current;
308 file_sb_list_del(file);
309 if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) {
310 unsigned long flags;
311 spin_lock_irqsave(&delayed_fput_lock, flags);
312 list_add(&file->f_u.fu_list, &delayed_fput_list);
313 schedule_work(&delayed_fput_work);
314 spin_unlock_irqrestore(&delayed_fput_lock, flags);
315 return;
316 }
317 init_task_work(&file->f_u.fu_rcuhead, ____fput);
318 task_work_add(task, &file->f_u.fu_rcuhead, true);
319 }
320}
321
322/*
323 * synchronous analog of fput(); for kernel threads that might be needed
324 * in some umount() (and thus can't use flush_delayed_fput() without
325 * risking deadlocks), need to wait for completion of __fput() and know
326 * for this specific struct file it won't involve anything that would
327 * need them. Use only if you really need it - at the very least,
328 * don't blindly convert fput() by kernel thread to that.
329 */
330void __fput_sync(struct file *file)
331{
332 if (atomic_long_dec_and_test(&file->f_count)) {
333 struct task_struct *task = current;
334 file_sb_list_del(file);
335 BUG_ON(!(task->flags & PF_KTHREAD));
269 __fput(file); 336 __fput(file);
337 }
270} 338}
271 339
272EXPORT_SYMBOL(fput); 340EXPORT_SYMBOL(fput);
@@ -483,10 +551,8 @@ void mark_files_ro(struct super_block *sb)
483{ 551{
484 struct file *f; 552 struct file *f;
485 553
486retry:
487 lg_global_lock(&files_lglock); 554 lg_global_lock(&files_lglock);
488 do_file_list_for_each_entry(sb, f) { 555 do_file_list_for_each_entry(sb, f) {
489 struct vfsmount *mnt;
490 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) 556 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
491 continue; 557 continue;
492 if (!file_count(f)) 558 if (!file_count(f))
@@ -499,12 +565,7 @@ retry:
499 if (file_check_writeable(f) != 0) 565 if (file_check_writeable(f) != 0)
500 continue; 566 continue;
501 file_release_write(f); 567 file_release_write(f);
502 mnt = mntget(f->f_path.mnt); 568 mnt_drop_write_file(f);
503 /* This can sleep, so we can't hold the spinlock. */
504 lg_global_unlock(&files_lglock);
505 mnt_drop_write(mnt);
506 mntput(mnt);
507 goto retry;
508 } while_file_list_for_each_entry; 569 } while_file_list_for_each_entry;
509 lg_global_unlock(&files_lglock); 570 lg_global_unlock(&files_lglock);
510} 571}
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 3360f1e678ad..bd447e88f208 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -48,7 +48,7 @@
48#define VXFS_BLOCK_PER_PAGE(sbp) ((PAGE_CACHE_SIZE / (sbp)->s_blocksize)) 48#define VXFS_BLOCK_PER_PAGE(sbp) ((PAGE_CACHE_SIZE / (sbp)->s_blocksize))
49 49
50 50
51static struct dentry * vxfs_lookup(struct inode *, struct dentry *, struct nameidata *); 51static struct dentry * vxfs_lookup(struct inode *, struct dentry *, unsigned int);
52static int vxfs_readdir(struct file *, void *, filldir_t); 52static int vxfs_readdir(struct file *, void *, filldir_t);
53 53
54const struct inode_operations vxfs_dir_inode_ops = { 54const struct inode_operations vxfs_dir_inode_ops = {
@@ -203,7 +203,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp)
203 * in the return pointer. 203 * in the return pointer.
204 */ 204 */
205static struct dentry * 205static struct dentry *
206vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd) 206vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags)
207{ 207{
208 struct inode *ip = NULL; 208 struct inode *ip = NULL;
209 ino_t ino; 209 ino_t ino;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 41a3ccff18d8..8f660dd6137a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1315,6 +1315,8 @@ void writeback_inodes_sb_nr(struct super_block *sb,
1315 .reason = reason, 1315 .reason = reason,
1316 }; 1316 };
1317 1317
1318 if (sb->s_bdi == &noop_backing_dev_info)
1319 return;
1318 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1320 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1319 bdi_queue_work(sb->s_bdi, &work); 1321 bdi_queue_work(sb->s_bdi, &work);
1320 wait_for_completion(&done); 1322 wait_for_completion(&done);
@@ -1398,6 +1400,9 @@ void sync_inodes_sb(struct super_block *sb)
1398 .reason = WB_REASON_SYNC, 1400 .reason = WB_REASON_SYNC,
1399 }; 1401 };
1400 1402
1403 /* Nothing to do? */
1404 if (sb->s_bdi == &noop_backing_dev_info)
1405 return;
1401 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1406 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1402 1407
1403 bdi_queue_work(sb->s_bdi, &work); 1408 bdi_queue_work(sb->s_bdi, &work);
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index e159e682ad4c..5df4775fea03 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -6,18 +6,6 @@
6#include <linux/fs_struct.h> 6#include <linux/fs_struct.h>
7#include "internal.h" 7#include "internal.h"
8 8
9static inline void path_get_longterm(struct path *path)
10{
11 path_get(path);
12 mnt_make_longterm(path->mnt);
13}
14
15static inline void path_put_longterm(struct path *path)
16{
17 mnt_make_shortterm(path->mnt);
18 path_put(path);
19}
20
21/* 9/*
22 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. 10 * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
23 * It can block. 11 * It can block.
@@ -26,7 +14,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
26{ 14{
27 struct path old_root; 15 struct path old_root;
28 16
29 path_get_longterm(path); 17 path_get(path);
30 spin_lock(&fs->lock); 18 spin_lock(&fs->lock);
31 write_seqcount_begin(&fs->seq); 19 write_seqcount_begin(&fs->seq);
32 old_root = fs->root; 20 old_root = fs->root;
@@ -34,7 +22,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
34 write_seqcount_end(&fs->seq); 22 write_seqcount_end(&fs->seq);
35 spin_unlock(&fs->lock); 23 spin_unlock(&fs->lock);
36 if (old_root.dentry) 24 if (old_root.dentry)
37 path_put_longterm(&old_root); 25 path_put(&old_root);
38} 26}
39 27
40/* 28/*
@@ -45,7 +33,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
45{ 33{
46 struct path old_pwd; 34 struct path old_pwd;
47 35
48 path_get_longterm(path); 36 path_get(path);
49 spin_lock(&fs->lock); 37 spin_lock(&fs->lock);
50 write_seqcount_begin(&fs->seq); 38 write_seqcount_begin(&fs->seq);
51 old_pwd = fs->pwd; 39 old_pwd = fs->pwd;
@@ -54,7 +42,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
54 spin_unlock(&fs->lock); 42 spin_unlock(&fs->lock);
55 43
56 if (old_pwd.dentry) 44 if (old_pwd.dentry)
57 path_put_longterm(&old_pwd); 45 path_put(&old_pwd);
58} 46}
59 47
60static inline int replace_path(struct path *p, const struct path *old, const struct path *new) 48static inline int replace_path(struct path *p, const struct path *old, const struct path *new)
@@ -84,7 +72,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
84 write_seqcount_end(&fs->seq); 72 write_seqcount_end(&fs->seq);
85 while (hits--) { 73 while (hits--) {
86 count++; 74 count++;
87 path_get_longterm(new_root); 75 path_get(new_root);
88 } 76 }
89 spin_unlock(&fs->lock); 77 spin_unlock(&fs->lock);
90 } 78 }
@@ -92,13 +80,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
92 } while_each_thread(g, p); 80 } while_each_thread(g, p);
93 read_unlock(&tasklist_lock); 81 read_unlock(&tasklist_lock);
94 while (count--) 82 while (count--)
95 path_put_longterm(old_root); 83 path_put(old_root);
96} 84}
97 85
98void free_fs_struct(struct fs_struct *fs) 86void free_fs_struct(struct fs_struct *fs)
99{ 87{
100 path_put_longterm(&fs->root); 88 path_put(&fs->root);
101 path_put_longterm(&fs->pwd); 89 path_put(&fs->pwd);
102 kmem_cache_free(fs_cachep, fs); 90 kmem_cache_free(fs_cachep, fs);
103} 91}
104 92
@@ -132,9 +120,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
132 120
133 spin_lock(&old->lock); 121 spin_lock(&old->lock);
134 fs->root = old->root; 122 fs->root = old->root;
135 path_get_longterm(&fs->root); 123 path_get(&fs->root);
136 fs->pwd = old->pwd; 124 fs->pwd = old->pwd;
137 path_get_longterm(&fs->pwd); 125 path_get(&fs->pwd);
138 spin_unlock(&old->lock); 126 spin_unlock(&old->lock);
139 } 127 }
140 return fs; 128 return fs;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 334e0b18a014..8964cf3999b2 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -154,7 +154,7 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
154 * the lookup once more. If the lookup results in the same inode, 154 * the lookup once more. If the lookup results in the same inode,
155 * then refresh the attributes, timeouts and mark the dentry valid. 155 * then refresh the attributes, timeouts and mark the dentry valid.
156 */ 156 */
157static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) 157static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
@@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
174 if (!inode) 174 if (!inode)
175 return 0; 175 return 0;
176 176
177 if (nd && (nd->flags & LOOKUP_RCU)) 177 if (flags & LOOKUP_RCU)
178 return -ECHILD; 178 return -ECHILD;
179 179
180 fc = get_fuse_conn(inode); 180 fc = get_fuse_conn(inode);
@@ -249,7 +249,7 @@ static struct dentry *fuse_d_add_directory(struct dentry *entry,
249 /* This tries to shrink the subtree below alias */ 249 /* This tries to shrink the subtree below alias */
250 fuse_invalidate_entry(alias); 250 fuse_invalidate_entry(alias);
251 dput(alias); 251 dput(alias);
252 if (!list_empty(&inode->i_dentry)) 252 if (!hlist_empty(&inode->i_dentry))
253 return ERR_PTR(-EBUSY); 253 return ERR_PTR(-EBUSY);
254 } else { 254 } else {
255 dput(alias); 255 dput(alias);
@@ -316,7 +316,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
316} 316}
317 317
318static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 318static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
319 struct nameidata *nd) 319 unsigned int flags)
320{ 320{
321 int err; 321 int err;
322 struct fuse_entry_out outarg; 322 struct fuse_entry_out outarg;
@@ -370,7 +370,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
370 * 'mknod' + 'open' requests. 370 * 'mknod' + 'open' requests.
371 */ 371 */
372static int fuse_create_open(struct inode *dir, struct dentry *entry, 372static int fuse_create_open(struct inode *dir, struct dentry *entry,
373 umode_t mode, struct nameidata *nd) 373 struct file *file, unsigned flags,
374 umode_t mode, int *opened)
374{ 375{
375 int err; 376 int err;
376 struct inode *inode; 377 struct inode *inode;
@@ -381,15 +382,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
381 struct fuse_open_out outopen; 382 struct fuse_open_out outopen;
382 struct fuse_entry_out outentry; 383 struct fuse_entry_out outentry;
383 struct fuse_file *ff; 384 struct fuse_file *ff;
384 struct file *file;
385 int flags = nd->intent.open.flags;
386
387 if (fc->no_create)
388 return -ENOSYS;
389 385
390 forget = fuse_alloc_forget(); 386 forget = fuse_alloc_forget();
387 err = -ENOMEM;
391 if (!forget) 388 if (!forget)
392 return -ENOMEM; 389 goto out_err;
393 390
394 req = fuse_get_req(fc); 391 req = fuse_get_req(fc);
395 err = PTR_ERR(req); 392 err = PTR_ERR(req);
@@ -428,11 +425,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
428 req->out.args[1].value = &outopen; 425 req->out.args[1].value = &outopen;
429 fuse_request_send(fc, req); 426 fuse_request_send(fc, req);
430 err = req->out.h.error; 427 err = req->out.h.error;
431 if (err) { 428 if (err)
432 if (err == -ENOSYS)
433 fc->no_create = 1;
434 goto out_free_ff; 429 goto out_free_ff;
435 }
436 430
437 err = -EIO; 431 err = -EIO;
438 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) 432 if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
@@ -448,28 +442,74 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
448 flags &= ~(O_CREAT | O_EXCL | O_TRUNC); 442 flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
449 fuse_sync_release(ff, flags); 443 fuse_sync_release(ff, flags);
450 fuse_queue_forget(fc, forget, outentry.nodeid, 1); 444 fuse_queue_forget(fc, forget, outentry.nodeid, 1);
451 return -ENOMEM; 445 err = -ENOMEM;
446 goto out_err;
452 } 447 }
453 kfree(forget); 448 kfree(forget);
454 d_instantiate(entry, inode); 449 d_instantiate(entry, inode);
455 fuse_change_entry_timeout(entry, &outentry); 450 fuse_change_entry_timeout(entry, &outentry);
456 fuse_invalidate_attr(dir); 451 fuse_invalidate_attr(dir);
457 file = lookup_instantiate_filp(nd, entry, generic_file_open); 452 err = finish_open(file, entry, generic_file_open, opened);
458 if (IS_ERR(file)) { 453 if (err) {
459 fuse_sync_release(ff, flags); 454 fuse_sync_release(ff, flags);
460 return PTR_ERR(file); 455 } else {
456 file->private_data = fuse_file_get(ff);
457 fuse_finish_open(inode, file);
461 } 458 }
462 file->private_data = fuse_file_get(ff); 459 return err;
463 fuse_finish_open(inode, file);
464 return 0;
465 460
466 out_free_ff: 461out_free_ff:
467 fuse_file_free(ff); 462 fuse_file_free(ff);
468 out_put_request: 463out_put_request:
469 fuse_put_request(fc, req); 464 fuse_put_request(fc, req);
470 out_put_forget_req: 465out_put_forget_req:
471 kfree(forget); 466 kfree(forget);
467out_err:
468 return err;
469}
470
471static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
472static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
473 struct file *file, unsigned flags,
474 umode_t mode, int *opened)
475{
476 int err;
477 struct fuse_conn *fc = get_fuse_conn(dir);
478 struct dentry *res = NULL;
479
480 if (d_unhashed(entry)) {
481 res = fuse_lookup(dir, entry, 0);
482 if (IS_ERR(res))
483 return PTR_ERR(res);
484
485 if (res)
486 entry = res;
487 }
488
489 if (!(flags & O_CREAT) || entry->d_inode)
490 goto no_open;
491
492 /* Only creates */
493 *opened |= FILE_CREATED;
494
495 if (fc->no_create)
496 goto mknod;
497
498 err = fuse_create_open(dir, entry, file, flags, mode, opened);
499 if (err == -ENOSYS) {
500 fc->no_create = 1;
501 goto mknod;
502 }
503out_dput:
504 dput(res);
472 return err; 505 return err;
506
507mknod:
508 err = fuse_mknod(dir, entry, mode, 0);
509 if (err)
510 goto out_dput;
511no_open:
512 return finish_no_open(file, res);
473} 513}
474 514
475/* 515/*
@@ -571,14 +611,8 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
571} 611}
572 612
573static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, 613static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
574 struct nameidata *nd) 614 bool excl)
575{ 615{
576 if (nd) {
577 int err = fuse_create_open(dir, entry, mode, nd);
578 if (err != -ENOSYS)
579 return err;
580 /* Fall back on mknod */
581 }
582 return fuse_mknod(dir, entry, mode, 0); 616 return fuse_mknod(dir, entry, mode, 0);
583} 617}
584 618
@@ -1646,6 +1680,7 @@ static const struct inode_operations fuse_dir_inode_operations = {
1646 .link = fuse_link, 1680 .link = fuse_link,
1647 .setattr = fuse_setattr, 1681 .setattr = fuse_setattr,
1648 .create = fuse_create, 1682 .create = fuse_create,
1683 .atomic_open = fuse_atomic_open,
1649 .mknod = fuse_mknod, 1684 .mknod = fuse_mknod,
1650 .permission = fuse_permission, 1685 .permission = fuse_permission,
1651 .getattr = fuse_getattr, 1686 .getattr = fuse_getattr,
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index e80a464850c8..d6526347d386 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -614,7 +614,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
614 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 614 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
615 int alloc_required; 615 int alloc_required;
616 int error = 0; 616 int error = 0;
617 struct gfs2_qadata *qa = NULL;
618 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 617 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
619 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 618 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
620 struct page *page; 619 struct page *page;
@@ -638,15 +637,9 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
638 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); 637 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
639 638
640 if (alloc_required) { 639 if (alloc_required) {
641 qa = gfs2_qadata_get(ip);
642 if (!qa) {
643 error = -ENOMEM;
644 goto out_unlock;
645 }
646
647 error = gfs2_quota_lock_check(ip); 640 error = gfs2_quota_lock_check(ip);
648 if (error) 641 if (error)
649 goto out_alloc_put; 642 goto out_unlock;
650 643
651 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 644 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
652 if (error) 645 if (error)
@@ -708,8 +701,6 @@ out_trans_fail:
708 gfs2_inplace_release(ip); 701 gfs2_inplace_release(ip);
709out_qunlock: 702out_qunlock:
710 gfs2_quota_unlock(ip); 703 gfs2_quota_unlock(ip);
711out_alloc_put:
712 gfs2_qadata_put(ip);
713 } 704 }
714out_unlock: 705out_unlock:
715 if (&ip->i_inode == sdp->sd_rindex) { 706 if (&ip->i_inode == sdp->sd_rindex) {
@@ -846,7 +837,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
846 struct gfs2_sbd *sdp = GFS2_SB(inode); 837 struct gfs2_sbd *sdp = GFS2_SB(inode);
847 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 838 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
848 struct buffer_head *dibh; 839 struct buffer_head *dibh;
849 struct gfs2_qadata *qa = ip->i_qadata;
850 unsigned int from = pos & (PAGE_CACHE_SIZE - 1); 840 unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
851 unsigned int to = from + len; 841 unsigned int to = from + len;
852 int ret; 842 int ret;
@@ -878,12 +868,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
878 brelse(dibh); 868 brelse(dibh);
879failed: 869failed:
880 gfs2_trans_end(sdp); 870 gfs2_trans_end(sdp);
881 if (ip->i_res) 871 if (gfs2_mb_reserved(ip))
882 gfs2_inplace_release(ip); 872 gfs2_inplace_release(ip);
883 if (qa) { 873 if (ip->i_res->rs_qa_qd_num)
884 gfs2_quota_unlock(ip); 874 gfs2_quota_unlock(ip);
885 gfs2_qadata_put(ip);
886 }
887 if (inode == sdp->sd_rindex) { 875 if (inode == sdp->sd_rindex) {
888 gfs2_glock_dq(&m_ip->i_gh); 876 gfs2_glock_dq(&m_ip->i_gh);
889 gfs2_holder_uninit(&m_ip->i_gh); 877 gfs2_holder_uninit(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index dab54099dd98..49cd7dd4a9fa 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -785,6 +785,9 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
785 if (error) 785 if (error)
786 goto out_rlist; 786 goto out_rlist;
787 787
788 if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */
789 gfs2_rs_deltree(ip->i_res);
790
788 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + 791 error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
789 RES_INDIRECT + RES_STATFS + RES_QUOTA, 792 RES_INDIRECT + RES_STATFS + RES_QUOTA,
790 revokes); 793 revokes);
@@ -1045,12 +1048,13 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1045 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; 1048 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
1046 1049
1047 find_metapath(sdp, lblock, &mp, ip->i_height); 1050 find_metapath(sdp, lblock, &mp, ip->i_height);
1048 if (!gfs2_qadata_get(ip)) 1051 error = gfs2_rindex_update(sdp);
1049 return -ENOMEM; 1052 if (error)
1053 return error;
1050 1054
1051 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1055 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1052 if (error) 1056 if (error)
1053 goto out; 1057 return error;
1054 1058
1055 while (height--) { 1059 while (height--) {
1056 struct strip_mine sm; 1060 struct strip_mine sm;
@@ -1064,8 +1068,6 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1064 1068
1065 gfs2_quota_unhold(ip); 1069 gfs2_quota_unhold(ip);
1066 1070
1067out:
1068 gfs2_qadata_put(ip);
1069 return error; 1071 return error;
1070} 1072}
1071 1073
@@ -1167,19 +1169,14 @@ static int do_grow(struct inode *inode, u64 size)
1167 struct gfs2_inode *ip = GFS2_I(inode); 1169 struct gfs2_inode *ip = GFS2_I(inode);
1168 struct gfs2_sbd *sdp = GFS2_SB(inode); 1170 struct gfs2_sbd *sdp = GFS2_SB(inode);
1169 struct buffer_head *dibh; 1171 struct buffer_head *dibh;
1170 struct gfs2_qadata *qa = NULL;
1171 int error; 1172 int error;
1172 int unstuff = 0; 1173 int unstuff = 0;
1173 1174
1174 if (gfs2_is_stuffed(ip) && 1175 if (gfs2_is_stuffed(ip) &&
1175 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1176 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1176 qa = gfs2_qadata_get(ip);
1177 if (qa == NULL)
1178 return -ENOMEM;
1179
1180 error = gfs2_quota_lock_check(ip); 1177 error = gfs2_quota_lock_check(ip);
1181 if (error) 1178 if (error)
1182 goto do_grow_alloc_put; 1179 return error;
1183 1180
1184 error = gfs2_inplace_reserve(ip, 1); 1181 error = gfs2_inplace_reserve(ip, 1);
1185 if (error) 1182 if (error)
@@ -1214,8 +1211,6 @@ do_grow_release:
1214 gfs2_inplace_release(ip); 1211 gfs2_inplace_release(ip);
1215do_grow_qunlock: 1212do_grow_qunlock:
1216 gfs2_quota_unlock(ip); 1213 gfs2_quota_unlock(ip);
1217do_grow_alloc_put:
1218 gfs2_qadata_put(ip);
1219 } 1214 }
1220 return error; 1215 return error;
1221} 1216}
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 0da8da2c991d..4fddb3c22d25 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -25,7 +25,7 @@
25/** 25/**
26 * gfs2_drevalidate - Check directory lookup consistency 26 * gfs2_drevalidate - Check directory lookup consistency
27 * @dentry: the mapping to check 27 * @dentry: the mapping to check
28 * @nd: 28 * @flags: lookup flags
29 * 29 *
30 * Check to make sure the lookup necessary to arrive at this inode from its 30 * Check to make sure the lookup necessary to arrive at this inode from its
31 * parent is still good. 31 * parent is still good.
@@ -33,7 +33,7 @@
33 * Returns: 1 if the dentry is ok, 0 if it isn't 33 * Returns: 1 if the dentry is ok, 0 if it isn't
34 */ 34 */
35 35
36static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) 36static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
37{ 37{
38 struct dentry *parent; 38 struct dentry *parent;
39 struct gfs2_sbd *sdp; 39 struct gfs2_sbd *sdp;
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
44 int error; 44 int error;
45 int had_lock = 0; 45 int had_lock = 0;
46 46
47 if (nd && nd->flags & LOOKUP_RCU) 47 if (flags & LOOKUP_RCU)
48 return -ECHILD; 48 return -ECHILD;
49 49
50 parent = dget_parent(dentry); 50 parent = dget_parent(dentry);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8aaeb07a07b5..259b088cfc4c 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -1854,14 +1854,9 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1854 if (!ht) 1854 if (!ht)
1855 return -ENOMEM; 1855 return -ENOMEM;
1856 1856
1857 if (!gfs2_qadata_get(dip)) {
1858 error = -ENOMEM;
1859 goto out;
1860 }
1861
1862 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1857 error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1863 if (error) 1858 if (error)
1864 goto out_put; 1859 goto out;
1865 1860
1866 /* Count the number of leaves */ 1861 /* Count the number of leaves */
1867 bh = leaf_bh; 1862 bh = leaf_bh;
@@ -1942,8 +1937,6 @@ out_rg_gunlock:
1942out_rlist: 1937out_rlist:
1943 gfs2_rlist_free(&rlist); 1938 gfs2_rlist_free(&rlist);
1944 gfs2_quota_unhold(dip); 1939 gfs2_quota_unhold(dip);
1945out_put:
1946 gfs2_qadata_put(dip);
1947out: 1940out:
1948 kfree(ht); 1941 kfree(ht);
1949 return error; 1942 return error;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 31b199f6efc1..9aa6af13823c 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -142,6 +142,7 @@ static const u32 fsflags_to_gfs2[32] = {
142 [7] = GFS2_DIF_NOATIME, 142 [7] = GFS2_DIF_NOATIME,
143 [12] = GFS2_DIF_EXHASH, 143 [12] = GFS2_DIF_EXHASH,
144 [14] = GFS2_DIF_INHERIT_JDATA, 144 [14] = GFS2_DIF_INHERIT_JDATA,
145 [17] = GFS2_DIF_TOPDIR,
145}; 146};
146 147
147static const u32 gfs2_to_fsflags[32] = { 148static const u32 gfs2_to_fsflags[32] = {
@@ -150,6 +151,7 @@ static const u32 gfs2_to_fsflags[32] = {
150 [gfs2fl_AppendOnly] = FS_APPEND_FL, 151 [gfs2fl_AppendOnly] = FS_APPEND_FL,
151 [gfs2fl_NoAtime] = FS_NOATIME_FL, 152 [gfs2fl_NoAtime] = FS_NOATIME_FL,
152 [gfs2fl_ExHash] = FS_INDEX_FL, 153 [gfs2fl_ExHash] = FS_INDEX_FL,
154 [gfs2fl_TopLevel] = FS_TOPDIR_FL,
153 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL, 155 [gfs2fl_InheritJdata] = FS_JOURNAL_DATA_FL,
154}; 156};
155 157
@@ -203,6 +205,7 @@ void gfs2_set_inode_flags(struct inode *inode)
203 GFS2_DIF_NOATIME| \ 205 GFS2_DIF_NOATIME| \
204 GFS2_DIF_SYNC| \ 206 GFS2_DIF_SYNC| \
205 GFS2_DIF_SYSTEM| \ 207 GFS2_DIF_SYSTEM| \
208 GFS2_DIF_TOPDIR| \
206 GFS2_DIF_INHERIT_JDATA) 209 GFS2_DIF_INHERIT_JDATA)
207 210
208/** 211/**
@@ -298,6 +301,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
298 301
299 gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags); 302 gfsflags = fsflags_cvt(fsflags_to_gfs2, fsflags);
300 if (!S_ISDIR(inode->i_mode)) { 303 if (!S_ISDIR(inode->i_mode)) {
304 gfsflags &= ~GFS2_DIF_TOPDIR;
301 if (gfsflags & GFS2_DIF_INHERIT_JDATA) 305 if (gfsflags & GFS2_DIF_INHERIT_JDATA)
302 gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA); 306 gfsflags ^= (GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA);
303 return do_gfs2_set_flags(filp, gfsflags, ~0); 307 return do_gfs2_set_flags(filp, gfsflags, ~0);
@@ -366,7 +370,6 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
366 u64 pos = page->index << PAGE_CACHE_SHIFT; 370 u64 pos = page->index << PAGE_CACHE_SHIFT;
367 unsigned int data_blocks, ind_blocks, rblocks; 371 unsigned int data_blocks, ind_blocks, rblocks;
368 struct gfs2_holder gh; 372 struct gfs2_holder gh;
369 struct gfs2_qadata *qa;
370 loff_t size; 373 loff_t size;
371 int ret; 374 int ret;
372 375
@@ -376,6 +379,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
376 */ 379 */
377 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 380 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
378 381
382 ret = gfs2_rs_alloc(ip);
383 if (ret)
384 return ret;
385
386 atomic_set(&ip->i_res->rs_sizehint,
387 PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift);
388
379 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); 389 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
380 ret = gfs2_glock_nq(&gh); 390 ret = gfs2_glock_nq(&gh);
381 if (ret) 391 if (ret)
@@ -393,14 +403,13 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
393 goto out_unlock; 403 goto out_unlock;
394 } 404 }
395 405
396 ret = -ENOMEM; 406 ret = gfs2_rindex_update(sdp);
397 qa = gfs2_qadata_get(ip); 407 if (ret)
398 if (qa == NULL)
399 goto out_unlock; 408 goto out_unlock;
400 409
401 ret = gfs2_quota_lock_check(ip); 410 ret = gfs2_quota_lock_check(ip);
402 if (ret) 411 if (ret)
403 goto out_alloc_put; 412 goto out_unlock;
404 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 413 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
405 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); 414 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
406 if (ret) 415 if (ret)
@@ -447,8 +456,6 @@ out_trans_fail:
447 gfs2_inplace_release(ip); 456 gfs2_inplace_release(ip);
448out_quota_unlock: 457out_quota_unlock:
449 gfs2_quota_unlock(ip); 458 gfs2_quota_unlock(ip);
450out_alloc_put:
451 gfs2_qadata_put(ip);
452out_unlock: 459out_unlock:
453 gfs2_glock_dq(&gh); 460 gfs2_glock_dq(&gh);
454out: 461out:
@@ -567,16 +574,14 @@ fail:
567 574
568static int gfs2_release(struct inode *inode, struct file *file) 575static int gfs2_release(struct inode *inode, struct file *file)
569{ 576{
570 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; 577 struct gfs2_inode *ip = GFS2_I(inode);
571 struct gfs2_file *fp;
572 578
573 fp = file->private_data; 579 kfree(file->private_data);
574 file->private_data = NULL; 580 file->private_data = NULL;
575 581
576 if (gfs2_assert_warn(sdp, fp)) 582 if ((file->f_mode & FMODE_WRITE) &&
577 return -EIO; 583 (atomic_read(&inode->i_writecount) == 1))
578 584 gfs2_rs_delete(ip);
579 kfree(fp);
580 585
581 return 0; 586 return 0;
582} 587}
@@ -653,12 +658,20 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
653 unsigned long nr_segs, loff_t pos) 658 unsigned long nr_segs, loff_t pos)
654{ 659{
655 struct file *file = iocb->ki_filp; 660 struct file *file = iocb->ki_filp;
661 size_t writesize = iov_length(iov, nr_segs);
662 struct dentry *dentry = file->f_dentry;
663 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
664 struct gfs2_sbd *sdp;
665 int ret;
656 666
667 sdp = GFS2_SB(file->f_mapping->host);
668 ret = gfs2_rs_alloc(ip);
669 if (ret)
670 return ret;
671
672 atomic_set(&ip->i_res->rs_sizehint, writesize >> sdp->sd_sb.sb_bsize_shift);
657 if (file->f_flags & O_APPEND) { 673 if (file->f_flags & O_APPEND) {
658 struct dentry *dentry = file->f_dentry;
659 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
660 struct gfs2_holder gh; 674 struct gfs2_holder gh;
661 int ret;
662 675
663 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 676 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
664 if (ret) 677 if (ret)
@@ -751,7 +764,6 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
751 struct gfs2_inode *ip = GFS2_I(inode); 764 struct gfs2_inode *ip = GFS2_I(inode);
752 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 765 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
753 loff_t bytes, max_bytes; 766 loff_t bytes, max_bytes;
754 struct gfs2_qadata *qa;
755 int error; 767 int error;
756 const loff_t pos = offset; 768 const loff_t pos = offset;
757 const loff_t count = len; 769 const loff_t count = len;
@@ -774,11 +786,17 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
774 if (bytes == 0) 786 if (bytes == 0)
775 bytes = sdp->sd_sb.sb_bsize; 787 bytes = sdp->sd_sb.sb_bsize;
776 788
789 error = gfs2_rs_alloc(ip);
790 if (error)
791 return error;
792
777 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); 793 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
778 error = gfs2_glock_nq(&ip->i_gh); 794 error = gfs2_glock_nq(&ip->i_gh);
779 if (unlikely(error)) 795 if (unlikely(error))
780 goto out_uninit; 796 goto out_uninit;
781 797
798 atomic_set(&ip->i_res->rs_sizehint, len >> sdp->sd_sb.sb_bsize_shift);
799
782 while (len > 0) { 800 while (len > 0) {
783 if (len < bytes) 801 if (len < bytes)
784 bytes = len; 802 bytes = len;
@@ -787,15 +805,9 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
787 offset += bytes; 805 offset += bytes;
788 continue; 806 continue;
789 } 807 }
790 qa = gfs2_qadata_get(ip);
791 if (!qa) {
792 error = -ENOMEM;
793 goto out_unlock;
794 }
795
796 error = gfs2_quota_lock_check(ip); 808 error = gfs2_quota_lock_check(ip);
797 if (error) 809 if (error)
798 goto out_alloc_put; 810 goto out_unlock;
799 811
800retry: 812retry:
801 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 813 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
@@ -835,7 +847,6 @@ retry:
835 offset += max_bytes; 847 offset += max_bytes;
836 gfs2_inplace_release(ip); 848 gfs2_inplace_release(ip);
837 gfs2_quota_unlock(ip); 849 gfs2_quota_unlock(ip);
838 gfs2_qadata_put(ip);
839 } 850 }
840 851
841 if (error == 0) 852 if (error == 0)
@@ -846,8 +857,6 @@ out_trans_fail:
846 gfs2_inplace_release(ip); 857 gfs2_inplace_release(ip);
847out_qunlock: 858out_qunlock:
848 gfs2_quota_unlock(ip); 859 gfs2_quota_unlock(ip);
849out_alloc_put:
850 gfs2_qadata_put(ip);
851out_unlock: 860out_unlock:
852 gfs2_glock_dq(&ip->i_gh); 861 gfs2_glock_dq(&ip->i_gh);
853out_uninit: 862out_uninit:
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index dab2526071cc..1ed81f40da0d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -46,10 +46,11 @@
46#include "trace_gfs2.h" 46#include "trace_gfs2.h"
47 47
48struct gfs2_glock_iter { 48struct gfs2_glock_iter {
49 int hash; /* hash bucket index */ 49 int hash; /* hash bucket index */
50 struct gfs2_sbd *sdp; /* incore superblock */ 50 unsigned nhash; /* Index within current bucket */
51 struct gfs2_glock *gl; /* current glock struct */ 51 struct gfs2_sbd *sdp; /* incore superblock */
52 char string[512]; /* scratch space */ 52 struct gfs2_glock *gl; /* current glock struct */
53 loff_t last_pos; /* last position */
53}; 54};
54 55
55typedef void (*glock_examiner) (struct gfs2_glock * gl); 56typedef void (*glock_examiner) (struct gfs2_glock * gl);
@@ -767,6 +768,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
767 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0; 768 gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
768 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0; 769 gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
769 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb)); 770 memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));
771 memset(gl->gl_lvb, 0, 32 * sizeof(char));
770 gl->gl_lksb.sb_lvbptr = gl->gl_lvb; 772 gl->gl_lksb.sb_lvbptr = gl->gl_lvb;
771 gl->gl_tchange = jiffies; 773 gl->gl_tchange = jiffies;
772 gl->gl_object = NULL; 774 gl->gl_object = NULL;
@@ -948,9 +950,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
948 va_start(args, fmt); 950 va_start(args, fmt);
949 951
950 if (seq) { 952 if (seq) {
951 struct gfs2_glock_iter *gi = seq->private; 953 seq_vprintf(seq, fmt, args);
952 vsprintf(gi->string, fmt, args);
953 seq_printf(seq, gi->string);
954 } else { 954 } else {
955 vaf.fmt = fmt; 955 vaf.fmt = fmt;
956 vaf.va = &args; 956 vaf.va = &args;
@@ -1854,8 +1854,14 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1854 gl = gi->gl; 1854 gl = gi->gl;
1855 if (gl) { 1855 if (gl) {
1856 gi->gl = glock_hash_next(gl); 1856 gi->gl = glock_hash_next(gl);
1857 gi->nhash++;
1857 } else { 1858 } else {
1859 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1860 rcu_read_unlock();
1861 return 1;
1862 }
1858 gi->gl = glock_hash_chain(gi->hash); 1863 gi->gl = glock_hash_chain(gi->hash);
1864 gi->nhash = 0;
1859 } 1865 }
1860 while (gi->gl == NULL) { 1866 while (gi->gl == NULL) {
1861 gi->hash++; 1867 gi->hash++;
@@ -1864,6 +1870,7 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1864 return 1; 1870 return 1;
1865 } 1871 }
1866 gi->gl = glock_hash_chain(gi->hash); 1872 gi->gl = glock_hash_chain(gi->hash);
1873 gi->nhash = 0;
1867 } 1874 }
1868 /* Skip entries for other sb and dead entries */ 1875 /* Skip entries for other sb and dead entries */
1869 } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); 1876 } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
@@ -1876,7 +1883,12 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1876 struct gfs2_glock_iter *gi = seq->private; 1883 struct gfs2_glock_iter *gi = seq->private;
1877 loff_t n = *pos; 1884 loff_t n = *pos;
1878 1885
1879 gi->hash = 0; 1886 if (gi->last_pos <= *pos)
1887 n = gi->nhash + (*pos - gi->last_pos);
1888 else
1889 gi->hash = 0;
1890
1891 gi->nhash = 0;
1880 rcu_read_lock(); 1892 rcu_read_lock();
1881 1893
1882 do { 1894 do {
@@ -1884,6 +1896,7 @@ static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1884 return NULL; 1896 return NULL;
1885 } while (n--); 1897 } while (n--);
1886 1898
1899 gi->last_pos = *pos;
1887 return gi->gl; 1900 return gi->gl;
1888} 1901}
1889 1902
@@ -1893,7 +1906,7 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1893 struct gfs2_glock_iter *gi = seq->private; 1906 struct gfs2_glock_iter *gi = seq->private;
1894 1907
1895 (*pos)++; 1908 (*pos)++;
1896 1909 gi->last_pos = *pos;
1897 if (gfs2_glock_iter_next(gi)) 1910 if (gfs2_glock_iter_next(gi))
1898 return NULL; 1911 return NULL;
1899 1912
@@ -1964,6 +1977,8 @@ static const struct seq_operations gfs2_sbstats_seq_ops = {
1964 .show = gfs2_sbstats_seq_show, 1977 .show = gfs2_sbstats_seq_show,
1965}; 1978};
1966 1979
1980#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)
1981
1967static int gfs2_glocks_open(struct inode *inode, struct file *file) 1982static int gfs2_glocks_open(struct inode *inode, struct file *file)
1968{ 1983{
1969 int ret = seq_open_private(file, &gfs2_glock_seq_ops, 1984 int ret = seq_open_private(file, &gfs2_glock_seq_ops,
@@ -1972,6 +1987,9 @@ static int gfs2_glocks_open(struct inode *inode, struct file *file)
1972 struct seq_file *seq = file->private_data; 1987 struct seq_file *seq = file->private_data;
1973 struct gfs2_glock_iter *gi = seq->private; 1988 struct gfs2_glock_iter *gi = seq->private;
1974 gi->sdp = inode->i_private; 1989 gi->sdp = inode->i_private;
1990 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
1991 if (seq->buf)
1992 seq->size = GFS2_SEQ_GOODSIZE;
1975 } 1993 }
1976 return ret; 1994 return ret;
1977} 1995}
@@ -1984,6 +2002,9 @@ static int gfs2_glstats_open(struct inode *inode, struct file *file)
1984 struct seq_file *seq = file->private_data; 2002 struct seq_file *seq = file->private_data;
1985 struct gfs2_glock_iter *gi = seq->private; 2003 struct gfs2_glock_iter *gi = seq->private;
1986 gi->sdp = inode->i_private; 2004 gi->sdp = inode->i_private;
2005 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2006 if (seq->buf)
2007 seq->size = GFS2_SEQ_GOODSIZE;
1987 } 2008 }
1988 return ret; 2009 return ret;
1989} 2010}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 67fd6beffece..aaecc8085fc5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -84,17 +84,22 @@ struct gfs2_rgrpd {
84 u32 rd_data; /* num of data blocks in rgrp */ 84 u32 rd_data; /* num of data blocks in rgrp */
85 u32 rd_bitbytes; /* number of bytes in data bitmaps */ 85 u32 rd_bitbytes; /* number of bytes in data bitmaps */
86 u32 rd_free; 86 u32 rd_free;
87 u32 rd_reserved; /* number of blocks reserved */
87 u32 rd_free_clone; 88 u32 rd_free_clone;
88 u32 rd_dinodes; 89 u32 rd_dinodes;
89 u64 rd_igeneration; 90 u64 rd_igeneration;
90 struct gfs2_bitmap *rd_bits; 91 struct gfs2_bitmap *rd_bits;
91 struct gfs2_sbd *rd_sbd; 92 struct gfs2_sbd *rd_sbd;
93 struct gfs2_rgrp_lvb *rd_rgl;
92 u32 rd_last_alloc; 94 u32 rd_last_alloc;
93 u32 rd_flags; 95 u32 rd_flags;
94#define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ 96#define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */
95#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ 97#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */
96#define GFS2_RDF_ERROR 0x40000000 /* error in rg */ 98#define GFS2_RDF_ERROR 0x40000000 /* error in rg */
97#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ 99#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
100 spinlock_t rd_rsspin; /* protects reservation related vars */
101 struct rb_root rd_rstree; /* multi-block reservation tree */
102 u32 rd_rs_cnt; /* count of current reservations */
98}; 103};
99 104
100enum gfs2_state_bits { 105enum gfs2_state_bits {
@@ -232,6 +237,38 @@ struct gfs2_holder {
232 unsigned long gh_ip; 237 unsigned long gh_ip;
233}; 238};
234 239
240/* Resource group multi-block reservation, in order of appearance:
241
242 Step 1. Function prepares to write, allocates a mb, sets the size hint.
243 Step 2. User calls inplace_reserve to target an rgrp, sets the rgrp info
244 Step 3. Function get_local_rgrp locks the rgrp, determines which bits to use
245 Step 4. Bits are assigned from the rgrp based on either the reservation
246 or wherever it can.
247*/
248
249struct gfs2_blkreserv {
250 /* components used during write (step 1): */
251 atomic_t rs_sizehint; /* hint of the write size */
252
253 /* components used during inplace_reserve (step 2): */
254 u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
255
256 /* components used during get_local_rgrp (step 3): */
257 struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */
258 struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */
259 struct rb_node rs_node; /* link to other block reservations */
260
261 /* components used during block searches and assignments (step 4): */
262 struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */
263 u32 rs_biblk; /* start block relative to the bi */
264 u32 rs_free; /* how many blocks are still free */
265
266 /* ancillary quota stuff */
267 struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS];
268 struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS];
269 unsigned int rs_qa_qd_num;
270};
271
235enum { 272enum {
236 GLF_LOCK = 1, 273 GLF_LOCK = 1,
237 GLF_DEMOTE = 3, 274 GLF_DEMOTE = 3,
@@ -289,18 +326,6 @@ struct gfs2_glock {
289 326
290#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 327#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
291 328
292struct gfs2_qadata { /* quota allocation data */
293 /* Quota stuff */
294 struct gfs2_quota_data *qa_qd[2*MAXQUOTAS];
295 struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS];
296 unsigned int qa_qd_num;
297};
298
299struct gfs2_blkreserv {
300 u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
301 struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
302};
303
304enum { 329enum {
305 GIF_INVALID = 0, 330 GIF_INVALID = 0,
306 GIF_QD_LOCKED = 1, 331 GIF_QD_LOCKED = 1,
@@ -308,7 +333,6 @@ enum {
308 GIF_SW_PAGED = 3, 333 GIF_SW_PAGED = 3,
309}; 334};
310 335
311
312struct gfs2_inode { 336struct gfs2_inode {
313 struct inode i_inode; 337 struct inode i_inode;
314 u64 i_no_addr; 338 u64 i_no_addr;
@@ -319,8 +343,7 @@ struct gfs2_inode {
319 struct gfs2_glock *i_gl; /* Move into i_gh? */ 343 struct gfs2_glock *i_gl; /* Move into i_gh? */
320 struct gfs2_holder i_iopen_gh; 344 struct gfs2_holder i_iopen_gh;
321 struct gfs2_holder i_gh; /* for prepare/commit_write only */ 345 struct gfs2_holder i_gh; /* for prepare/commit_write only */
322 struct gfs2_qadata *i_qadata; /* quota allocation data */ 346 struct gfs2_blkreserv *i_res; /* rgrp multi-block reservation */
323 struct gfs2_blkreserv *i_res; /* resource group block reservation */
324 struct gfs2_rgrpd *i_rgd; 347 struct gfs2_rgrpd *i_rgd;
325 u64 i_goal; /* goal block for allocations */ 348 u64 i_goal; /* goal block for allocations */
326 struct rw_semaphore i_rw_mutex; 349 struct rw_semaphore i_rw_mutex;
@@ -473,6 +496,7 @@ struct gfs2_args {
473 unsigned int ar_discard:1; /* discard requests */ 496 unsigned int ar_discard:1; /* discard requests */
474 unsigned int ar_errors:2; /* errors=withdraw | panic */ 497 unsigned int ar_errors:2; /* errors=withdraw | panic */
475 unsigned int ar_nobarrier:1; /* do not send barriers */ 498 unsigned int ar_nobarrier:1; /* do not send barriers */
499 unsigned int ar_rgrplvb:1; /* use lvbs for rgrp info */
476 int ar_commit; /* Commit interval */ 500 int ar_commit; /* Commit interval */
477 int ar_statfs_quantum; /* The fast statfs interval */ 501 int ar_statfs_quantum; /* The fast statfs interval */
478 int ar_quota_quantum; /* The quota interval */ 502 int ar_quota_quantum; /* The quota interval */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index a9ba2444e077..4ce22e547308 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -521,12 +521,13 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
521 int error; 521 int error;
522 522
523 munge_mode_uid_gid(dip, &mode, &uid, &gid); 523 munge_mode_uid_gid(dip, &mode, &uid, &gid);
524 if (!gfs2_qadata_get(dip)) 524 error = gfs2_rindex_update(sdp);
525 return -ENOMEM; 525 if (error)
526 return error;
526 527
527 error = gfs2_quota_lock(dip, uid, gid); 528 error = gfs2_quota_lock(dip, uid, gid);
528 if (error) 529 if (error)
529 goto out; 530 return error;
530 531
531 error = gfs2_quota_check(dip, uid, gid); 532 error = gfs2_quota_check(dip, uid, gid);
532 if (error) 533 if (error)
@@ -542,8 +543,6 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
542 543
543out_quota: 544out_quota:
544 gfs2_quota_unlock(dip); 545 gfs2_quota_unlock(dip);
545out:
546 gfs2_qadata_put(dip);
547 return error; 546 return error;
548} 547}
549 548
@@ -551,14 +550,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
551 struct gfs2_inode *ip) 550 struct gfs2_inode *ip)
552{ 551{
553 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 552 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
554 struct gfs2_qadata *qa;
555 int alloc_required; 553 int alloc_required;
556 struct buffer_head *dibh; 554 struct buffer_head *dibh;
557 int error; 555 int error;
558 556
559 qa = gfs2_qadata_get(dip); 557 error = gfs2_rindex_update(sdp);
560 if (!qa) 558 if (error)
561 return -ENOMEM; 559 return error;
562 560
563 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 561 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
564 if (error) 562 if (error)
@@ -605,13 +603,13 @@ fail_end_trans:
605 gfs2_trans_end(sdp); 603 gfs2_trans_end(sdp);
606 604
607fail_ipreserv: 605fail_ipreserv:
608 gfs2_inplace_release(dip); 606 if (alloc_required)
607 gfs2_inplace_release(dip);
609 608
610fail_quota_locks: 609fail_quota_locks:
611 gfs2_quota_unlock(dip); 610 gfs2_quota_unlock(dip);
612 611
613fail: 612fail:
614 gfs2_qadata_put(dip);
615 return error; 613 return error;
616} 614}
617 615
@@ -657,7 +655,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
657 const struct qstr *name = &dentry->d_name; 655 const struct qstr *name = &dentry->d_name;
658 struct gfs2_holder ghs[2]; 656 struct gfs2_holder ghs[2];
659 struct inode *inode = NULL; 657 struct inode *inode = NULL;
660 struct gfs2_inode *dip = GFS2_I(dir); 658 struct gfs2_inode *dip = GFS2_I(dir), *ip;
661 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 659 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
662 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; 660 struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 };
663 int error; 661 int error;
@@ -667,6 +665,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
667 if (!name->len || name->len > GFS2_FNAMESIZE) 665 if (!name->len || name->len > GFS2_FNAMESIZE)
668 return -ENAMETOOLONG; 666 return -ENAMETOOLONG;
669 667
668 /* We need a reservation to allocate the new dinode block. The
669 directory ip temporarily points to the reservation, but this is
670 being done to get a set of contiguous blocks for the new dinode.
671 Since this is a create, we don't have a sizehint yet, so it will
672 have to use the minimum reservation size. */
673 error = gfs2_rs_alloc(dip);
674 if (error)
675 return error;
676
670 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 677 error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
671 if (error) 678 if (error)
672 goto fail; 679 goto fail;
@@ -700,19 +707,29 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
700 if (IS_ERR(inode)) 707 if (IS_ERR(inode))
701 goto fail_gunlock2; 708 goto fail_gunlock2;
702 709
703 error = gfs2_inode_refresh(GFS2_I(inode)); 710 ip = GFS2_I(inode);
711 error = gfs2_inode_refresh(ip);
704 if (error) 712 if (error)
705 goto fail_gunlock2; 713 goto fail_gunlock2;
706 714
715 /* The newly created inode needs a reservation so it can allocate
716 xattrs. At the same time, we want new blocks allocated to the new
717 dinode to be as contiguous as possible. Since we allocated the
718 dinode block under the directory's reservation, we transfer
719 ownership of that reservation to the new inode. The directory
720 doesn't need a reservation unless it needs a new allocation. */
721 ip->i_res = dip->i_res;
722 dip->i_res = NULL;
723
707 error = gfs2_acl_create(dip, inode); 724 error = gfs2_acl_create(dip, inode);
708 if (error) 725 if (error)
709 goto fail_gunlock2; 726 goto fail_gunlock2;
710 727
711 error = gfs2_security_init(dip, GFS2_I(inode), name); 728 error = gfs2_security_init(dip, ip, name);
712 if (error) 729 if (error)
713 goto fail_gunlock2; 730 goto fail_gunlock2;
714 731
715 error = link_dinode(dip, name, GFS2_I(inode)); 732 error = link_dinode(dip, name, ip);
716 if (error) 733 if (error)
717 goto fail_gunlock2; 734 goto fail_gunlock2;
718 735
@@ -722,10 +739,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
722 gfs2_trans_end(sdp); 739 gfs2_trans_end(sdp);
723 /* Check if we reserved space in the rgrp. Function link_dinode may 740 /* Check if we reserved space in the rgrp. Function link_dinode may
724 not, depending on whether alloc is required. */ 741 not, depending on whether alloc is required. */
725 if (dip->i_res) 742 if (gfs2_mb_reserved(dip))
726 gfs2_inplace_release(dip); 743 gfs2_inplace_release(dip);
727 gfs2_quota_unlock(dip); 744 gfs2_quota_unlock(dip);
728 gfs2_qadata_put(dip);
729 mark_inode_dirty(inode); 745 mark_inode_dirty(inode);
730 gfs2_glock_dq_uninit_m(2, ghs); 746 gfs2_glock_dq_uninit_m(2, ghs);
731 d_instantiate(dentry, inode); 747 d_instantiate(dentry, inode);
@@ -740,6 +756,7 @@ fail_gunlock:
740 iput(inode); 756 iput(inode);
741 } 757 }
742fail: 758fail:
759 gfs2_rs_delete(dip);
743 if (bh) 760 if (bh)
744 brelse(bh); 761 brelse(bh);
745 return error; 762 return error;
@@ -755,11 +772,8 @@ fail:
755 */ 772 */
756 773
757static int gfs2_create(struct inode *dir, struct dentry *dentry, 774static int gfs2_create(struct inode *dir, struct dentry *dentry,
758 umode_t mode, struct nameidata *nd) 775 umode_t mode, bool excl)
759{ 776{
760 int excl = 0;
761 if (nd && (nd->flags & LOOKUP_EXCL))
762 excl = 1;
763 return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl); 777 return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl);
764} 778}
765 779
@@ -775,7 +789,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
775 */ 789 */
776 790
777static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, 791static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
778 struct nameidata *nd) 792 unsigned int flags)
779{ 793{
780 struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0); 794 struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0);
781 if (inode && !IS_ERR(inode)) { 795 if (inode && !IS_ERR(inode)) {
@@ -819,6 +833,10 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
819 if (S_ISDIR(inode->i_mode)) 833 if (S_ISDIR(inode->i_mode))
820 return -EPERM; 834 return -EPERM;
821 835
836 error = gfs2_rs_alloc(dip);
837 if (error)
838 return error;
839
822 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 840 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
823 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 841 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
824 842
@@ -870,16 +888,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
870 error = 0; 888 error = 0;
871 889
872 if (alloc_required) { 890 if (alloc_required) {
873 struct gfs2_qadata *qa = gfs2_qadata_get(dip);
874
875 if (!qa) {
876 error = -ENOMEM;
877 goto out_gunlock;
878 }
879
880 error = gfs2_quota_lock_check(dip); 891 error = gfs2_quota_lock_check(dip);
881 if (error) 892 if (error)
882 goto out_alloc; 893 goto out_gunlock;
883 894
884 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres); 895 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
885 if (error) 896 if (error)
@@ -922,9 +933,6 @@ out_ipres:
922out_gunlock_q: 933out_gunlock_q:
923 if (alloc_required) 934 if (alloc_required)
924 gfs2_quota_unlock(dip); 935 gfs2_quota_unlock(dip);
925out_alloc:
926 if (alloc_required)
927 gfs2_qadata_put(dip);
928out_gunlock: 936out_gunlock:
929 gfs2_glock_dq(ghs + 1); 937 gfs2_glock_dq(ghs + 1);
930out_child: 938out_child:
@@ -1234,6 +1242,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1234 if (error) 1242 if (error)
1235 return error; 1243 return error;
1236 1244
1245 error = gfs2_rs_alloc(ndip);
1246 if (error)
1247 return error;
1248
1237 if (odip != ndip) { 1249 if (odip != ndip) {
1238 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 1250 error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
1239 0, &r_gh); 1251 0, &r_gh);
@@ -1357,16 +1369,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1357 goto out_gunlock; 1369 goto out_gunlock;
1358 1370
1359 if (alloc_required) { 1371 if (alloc_required) {
1360 struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
1361
1362 if (!qa) {
1363 error = -ENOMEM;
1364 goto out_gunlock;
1365 }
1366
1367 error = gfs2_quota_lock_check(ndip); 1372 error = gfs2_quota_lock_check(ndip);
1368 if (error) 1373 if (error)
1369 goto out_alloc; 1374 goto out_gunlock;
1370 1375
1371 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres); 1376 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
1372 if (error) 1377 if (error)
@@ -1427,9 +1432,6 @@ out_ipreserv:
1427out_gunlock_q: 1432out_gunlock_q:
1428 if (alloc_required) 1433 if (alloc_required)
1429 gfs2_quota_unlock(ndip); 1434 gfs2_quota_unlock(ndip);
1430out_alloc:
1431 if (alloc_required)
1432 gfs2_qadata_put(ndip);
1433out_gunlock: 1435out_gunlock:
1434 while (x--) { 1436 while (x--) {
1435 gfs2_glock_dq(ghs + x); 1437 gfs2_glock_dq(ghs + x);
@@ -1590,12 +1592,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1590 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) 1592 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1591 ogid = ngid = NO_QUOTA_CHANGE; 1593 ogid = ngid = NO_QUOTA_CHANGE;
1592 1594
1593 if (!gfs2_qadata_get(ip))
1594 return -ENOMEM;
1595
1596 error = gfs2_quota_lock(ip, nuid, ngid); 1595 error = gfs2_quota_lock(ip, nuid, ngid);
1597 if (error) 1596 if (error)
1598 goto out_alloc; 1597 return error;
1599 1598
1600 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { 1599 if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) {
1601 error = gfs2_quota_check(ip, nuid, ngid); 1600 error = gfs2_quota_check(ip, nuid, ngid);
@@ -1621,8 +1620,6 @@ out_end_trans:
1621 gfs2_trans_end(sdp); 1620 gfs2_trans_end(sdp);
1622out_gunlock_q: 1621out_gunlock_q:
1623 gfs2_quota_unlock(ip); 1622 gfs2_quota_unlock(ip);
1624out_alloc:
1625 gfs2_qadata_put(ip);
1626 return error; 1623 return error;
1627} 1624}
1628 1625
@@ -1644,6 +1641,10 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
1644 struct gfs2_holder i_gh; 1641 struct gfs2_holder i_gh;
1645 int error; 1642 int error;
1646 1643
1644 error = gfs2_rs_alloc(ip);
1645 if (error)
1646 return error;
1647
1647 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 1648 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1648 if (error) 1649 if (error)
1649 return error; 1650 return error;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 852c1be1dd3b..8ff95a2d54ee 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -401,9 +401,14 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
401 goto out; 401 goto out;
402 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 402 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
403 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 403 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
404 gfs2_meta_check(sdp, bd->bd_bh);
405 gfs2_pin(sdp, bd->bd_bh);
406 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; 404 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
405 if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) {
406 printk(KERN_ERR
407 "Attempting to add uninitialised block to journal (inplace block=%lld)\n",
408 (unsigned long long)bd->bd_bh->b_blocknr);
409 BUG();
410 }
411 gfs2_pin(sdp, bd->bd_bh);
407 mh->__pad0 = cpu_to_be64(0); 412 mh->__pad0 = cpu_to_be64(0);
408 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); 413 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
409 sdp->sd_log_num_buf++; 414 sdp->sd_log_num_buf++;
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 6cdb0f2a1b09..e04d0e09ee7b 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -43,7 +43,6 @@ static void gfs2_init_inode_once(void *foo)
43 inode_init_once(&ip->i_inode); 43 inode_init_once(&ip->i_inode);
44 init_rwsem(&ip->i_rw_mutex); 44 init_rwsem(&ip->i_rw_mutex);
45 INIT_LIST_HEAD(&ip->i_trunc_list); 45 INIT_LIST_HEAD(&ip->i_trunc_list);
46 ip->i_qadata = NULL;
47 ip->i_res = NULL; 46 ip->i_res = NULL;
48 ip->i_hash_cache = NULL; 47 ip->i_hash_cache = NULL;
49} 48}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 6c1e5d1c404a..3a56c8d94de0 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -213,8 +213,10 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
213 struct gfs2_sbd *sdp = gl->gl_sbd; 213 struct gfs2_sbd *sdp = gl->gl_sbd;
214 struct buffer_head *bh; 214 struct buffer_head *bh;
215 215
216 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 216 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) {
217 *bhp = NULL;
217 return -EIO; 218 return -EIO;
219 }
218 220
219 *bhp = bh = gfs2_getbuf(gl, blkno, CREATE); 221 *bhp = bh = gfs2_getbuf(gl, blkno, CREATE);
220 222
@@ -235,6 +237,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
235 if (tr && tr->tr_touched) 237 if (tr && tr->tr_touched)
236 gfs2_io_error_bh(sdp, bh); 238 gfs2_io_error_bh(sdp, bh);
237 brelse(bh); 239 brelse(bh);
240 *bhp = NULL;
238 return -EIO; 241 return -EIO;
239 } 242 }
240 243
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index b8c250fc4922..e5af9dc420ef 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1118,20 +1118,33 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1118 } 1118 }
1119 1119
1120 error = init_names(sdp, silent); 1120 error = init_names(sdp, silent);
1121 if (error) 1121 if (error) {
1122 goto fail; 1122 /* In this case, we haven't initialized sysfs, so we have to
1123 manually free the sdp. */
1124 free_percpu(sdp->sd_lkstats);
1125 kfree(sdp);
1126 sb->s_fs_info = NULL;
1127 return error;
1128 }
1123 1129
1124 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name); 1130 snprintf(sdp->sd_fsname, GFS2_FSNAME_LEN, "%s", sdp->sd_table_name);
1125 1131
1126 gfs2_create_debugfs_file(sdp);
1127
1128 error = gfs2_sys_fs_add(sdp); 1132 error = gfs2_sys_fs_add(sdp);
1133 /*
1134 * If we hit an error here, gfs2_sys_fs_add will have called function
1135 * kobject_put which causes the sysfs usage count to go to zero, which
1136 * causes sysfs to call function gfs2_sbd_release, which frees sdp.
1137 * Subsequent error paths here will call gfs2_sys_fs_del, which also
1138 * kobject_put to free sdp.
1139 */
1129 if (error) 1140 if (error)
1130 goto fail; 1141 return error;
1142
1143 gfs2_create_debugfs_file(sdp);
1131 1144
1132 error = gfs2_lm_mount(sdp, silent); 1145 error = gfs2_lm_mount(sdp, silent);
1133 if (error) 1146 if (error)
1134 goto fail_sys; 1147 goto fail_debug;
1135 1148
1136 error = init_locking(sdp, &mount_gh, DO); 1149 error = init_locking(sdp, &mount_gh, DO);
1137 if (error) 1150 if (error)
@@ -1215,12 +1228,12 @@ fail_locking:
1215fail_lm: 1228fail_lm:
1216 gfs2_gl_hash_clear(sdp); 1229 gfs2_gl_hash_clear(sdp);
1217 gfs2_lm_unmount(sdp); 1230 gfs2_lm_unmount(sdp);
1218fail_sys: 1231fail_debug:
1219 gfs2_sys_fs_del(sdp);
1220fail:
1221 gfs2_delete_debugfs_file(sdp); 1232 gfs2_delete_debugfs_file(sdp);
1222 free_percpu(sdp->sd_lkstats); 1233 free_percpu(sdp->sd_lkstats);
1223 kfree(sdp); 1234 /* gfs2_sys_fs_del must be the last thing we do, since it causes
1235 * sysfs to call function gfs2_sbd_release, which frees sdp. */
1236 gfs2_sys_fs_del(sdp);
1224 sb->s_fs_info = NULL; 1237 sb->s_fs_info = NULL;
1225 return error; 1238 return error;
1226} 1239}
@@ -1286,7 +1299,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1286 error = -EBUSY; 1299 error = -EBUSY;
1287 goto error_bdev; 1300 goto error_bdev;
1288 } 1301 }
1289 s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev); 1302 s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev);
1290 mutex_unlock(&bdev->bd_fsfreeze_mutex); 1303 mutex_unlock(&bdev->bd_fsfreeze_mutex);
1291 error = PTR_ERR(s); 1304 error = PTR_ERR(s);
1292 if (IS_ERR(s)) 1305 if (IS_ERR(s))
@@ -1316,7 +1329,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
1316 } else { 1329 } else {
1317 char b[BDEVNAME_SIZE]; 1330 char b[BDEVNAME_SIZE];
1318 1331
1319 s->s_flags = flags;
1320 s->s_mode = mode; 1332 s->s_mode = mode;
1321 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 1333 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1322 sb_set_blocksize(s, block_size(bdev)); 1334 sb_set_blocksize(s, block_size(bdev));
@@ -1360,7 +1372,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
1360 dev_name, error); 1372 dev_name, error);
1361 return ERR_PTR(error); 1373 return ERR_PTR(error);
1362 } 1374 }
1363 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, 1375 s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
1364 path.dentry->d_inode->i_sb->s_bdev); 1376 path.dentry->d_inode->i_sb->s_bdev);
1365 path_put(&path); 1377 path_put(&path);
1366 if (IS_ERR(s)) { 1378 if (IS_ERR(s)) {
@@ -1390,10 +1402,9 @@ static void gfs2_kill_sb(struct super_block *sb)
1390 sdp->sd_root_dir = NULL; 1402 sdp->sd_root_dir = NULL;
1391 sdp->sd_master_dir = NULL; 1403 sdp->sd_master_dir = NULL;
1392 shrink_dcache_sb(sb); 1404 shrink_dcache_sb(sb);
1393 kill_block_super(sb);
1394 gfs2_delete_debugfs_file(sdp); 1405 gfs2_delete_debugfs_file(sdp);
1395 free_percpu(sdp->sd_lkstats); 1406 free_percpu(sdp->sd_lkstats);
1396 kfree(sdp); 1407 kill_block_super(sb);
1397} 1408}
1398 1409
1399struct file_system_type gfs2_fs_type = { 1410struct file_system_type gfs2_fs_type = {
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index b97178e7d397..a3bde91645c2 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -494,11 +494,15 @@ static void qdsb_put(struct gfs2_quota_data *qd)
494int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) 494int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
495{ 495{
496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
497 struct gfs2_qadata *qa = ip->i_qadata; 497 struct gfs2_quota_data **qd;
498 struct gfs2_quota_data **qd = qa->qa_qd;
499 int error; 498 int error;
500 499
501 if (gfs2_assert_warn(sdp, !qa->qa_qd_num) || 500 if (ip->i_res == NULL)
501 gfs2_rs_alloc(ip);
502
503 qd = ip->i_res->rs_qa_qd;
504
505 if (gfs2_assert_warn(sdp, !ip->i_res->rs_qa_qd_num) ||
502 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) 506 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
503 return -EIO; 507 return -EIO;
504 508
@@ -508,20 +512,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
508 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); 512 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
509 if (error) 513 if (error)
510 goto out; 514 goto out;
511 qa->qa_qd_num++; 515 ip->i_res->rs_qa_qd_num++;
512 qd++; 516 qd++;
513 517
514 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); 518 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
515 if (error) 519 if (error)
516 goto out; 520 goto out;
517 qa->qa_qd_num++; 521 ip->i_res->rs_qa_qd_num++;
518 qd++; 522 qd++;
519 523
520 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { 524 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
521 error = qdsb_get(sdp, QUOTA_USER, uid, qd); 525 error = qdsb_get(sdp, QUOTA_USER, uid, qd);
522 if (error) 526 if (error)
523 goto out; 527 goto out;
524 qa->qa_qd_num++; 528 ip->i_res->rs_qa_qd_num++;
525 qd++; 529 qd++;
526 } 530 }
527 531
@@ -529,7 +533,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
529 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); 533 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
530 if (error) 534 if (error)
531 goto out; 535 goto out;
532 qa->qa_qd_num++; 536 ip->i_res->rs_qa_qd_num++;
533 qd++; 537 qd++;
534 } 538 }
535 539
@@ -542,16 +546,17 @@ out:
542void gfs2_quota_unhold(struct gfs2_inode *ip) 546void gfs2_quota_unhold(struct gfs2_inode *ip)
543{ 547{
544 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 548 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
545 struct gfs2_qadata *qa = ip->i_qadata;
546 unsigned int x; 549 unsigned int x;
547 550
551 if (ip->i_res == NULL)
552 return;
548 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); 553 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
549 554
550 for (x = 0; x < qa->qa_qd_num; x++) { 555 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
551 qdsb_put(qa->qa_qd[x]); 556 qdsb_put(ip->i_res->rs_qa_qd[x]);
552 qa->qa_qd[x] = NULL; 557 ip->i_res->rs_qa_qd[x] = NULL;
553 } 558 }
554 qa->qa_qd_num = 0; 559 ip->i_res->rs_qa_qd_num = 0;
555} 560}
556 561
557static int sort_qd(const void *a, const void *b) 562static int sort_qd(const void *a, const void *b)
@@ -764,6 +769,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
764 unsigned int nalloc = 0, blocks; 769 unsigned int nalloc = 0, blocks;
765 int error; 770 int error;
766 771
772 error = gfs2_rs_alloc(ip);
773 if (error)
774 return error;
775
767 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 776 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
768 &data_blocks, &ind_blocks); 777 &data_blocks, &ind_blocks);
769 778
@@ -915,7 +924,6 @@ fail:
915int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) 924int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
916{ 925{
917 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 926 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
918 struct gfs2_qadata *qa = ip->i_qadata;
919 struct gfs2_quota_data *qd; 927 struct gfs2_quota_data *qd;
920 unsigned int x; 928 unsigned int x;
921 int error = 0; 929 int error = 0;
@@ -928,15 +936,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
928 sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 936 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
929 return 0; 937 return 0;
930 938
931 sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *), 939 sort(ip->i_res->rs_qa_qd, ip->i_res->rs_qa_qd_num,
932 sort_qd, NULL); 940 sizeof(struct gfs2_quota_data *), sort_qd, NULL);
933 941
934 for (x = 0; x < qa->qa_qd_num; x++) { 942 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
935 int force = NO_FORCE; 943 int force = NO_FORCE;
936 qd = qa->qa_qd[x]; 944 qd = ip->i_res->rs_qa_qd[x];
937 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) 945 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
938 force = FORCE; 946 force = FORCE;
939 error = do_glock(qd, force, &qa->qa_qd_ghs[x]); 947 error = do_glock(qd, force, &ip->i_res->rs_qa_qd_ghs[x]);
940 if (error) 948 if (error)
941 break; 949 break;
942 } 950 }
@@ -945,7 +953,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
945 set_bit(GIF_QD_LOCKED, &ip->i_flags); 953 set_bit(GIF_QD_LOCKED, &ip->i_flags);
946 else { 954 else {
947 while (x--) 955 while (x--)
948 gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]); 956 gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
949 gfs2_quota_unhold(ip); 957 gfs2_quota_unhold(ip);
950 } 958 }
951 959
@@ -990,7 +998,6 @@ static int need_sync(struct gfs2_quota_data *qd)
990 998
991void gfs2_quota_unlock(struct gfs2_inode *ip) 999void gfs2_quota_unlock(struct gfs2_inode *ip)
992{ 1000{
993 struct gfs2_qadata *qa = ip->i_qadata;
994 struct gfs2_quota_data *qda[4]; 1001 struct gfs2_quota_data *qda[4];
995 unsigned int count = 0; 1002 unsigned int count = 0;
996 unsigned int x; 1003 unsigned int x;
@@ -998,14 +1005,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
998 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) 1005 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
999 goto out; 1006 goto out;
1000 1007
1001 for (x = 0; x < qa->qa_qd_num; x++) { 1008 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1002 struct gfs2_quota_data *qd; 1009 struct gfs2_quota_data *qd;
1003 int sync; 1010 int sync;
1004 1011
1005 qd = qa->qa_qd[x]; 1012 qd = ip->i_res->rs_qa_qd[x];
1006 sync = need_sync(qd); 1013 sync = need_sync(qd);
1007 1014
1008 gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]); 1015 gfs2_glock_dq_uninit(&ip->i_res->rs_qa_qd_ghs[x]);
1009 1016
1010 if (sync && qd_trylock(qd)) 1017 if (sync && qd_trylock(qd))
1011 qda[count++] = qd; 1018 qda[count++] = qd;
@@ -1038,7 +1045,6 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1038int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) 1045int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1039{ 1046{
1040 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1047 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1041 struct gfs2_qadata *qa = ip->i_qadata;
1042 struct gfs2_quota_data *qd; 1048 struct gfs2_quota_data *qd;
1043 s64 value; 1049 s64 value;
1044 unsigned int x; 1050 unsigned int x;
@@ -1050,8 +1056,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1050 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 1056 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
1051 return 0; 1057 return 0;
1052 1058
1053 for (x = 0; x < qa->qa_qd_num; x++) { 1059 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1054 qd = qa->qa_qd[x]; 1060 qd = ip->i_res->rs_qa_qd[x];
1055 1061
1056 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1062 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1057 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) 1063 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
@@ -1089,7 +1095,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1089void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 1095void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1090 u32 uid, u32 gid) 1096 u32 uid, u32 gid)
1091{ 1097{
1092 struct gfs2_qadata *qa = ip->i_qadata;
1093 struct gfs2_quota_data *qd; 1098 struct gfs2_quota_data *qd;
1094 unsigned int x; 1099 unsigned int x;
1095 1100
@@ -1098,8 +1103,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1098 if (ip->i_diskflags & GFS2_DIF_SYSTEM) 1103 if (ip->i_diskflags & GFS2_DIF_SYSTEM)
1099 return; 1104 return;
1100 1105
1101 for (x = 0; x < qa->qa_qd_num; x++) { 1106 for (x = 0; x < ip->i_res->rs_qa_qd_num; x++) {
1102 qd = qa->qa_qd[x]; 1107 qd = ip->i_res->rs_qa_qd[x];
1103 1108
1104 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1109 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1105 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { 1110 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
@@ -1108,7 +1113,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1108 } 1113 }
1109} 1114}
1110 1115
1111int gfs2_quota_sync(struct super_block *sb, int type, int wait) 1116int gfs2_quota_sync(struct super_block *sb, int type)
1112{ 1117{
1113 struct gfs2_sbd *sdp = sb->s_fs_info; 1118 struct gfs2_sbd *sdp = sb->s_fs_info;
1114 struct gfs2_quota_data **qda; 1119 struct gfs2_quota_data **qda;
@@ -1154,7 +1159,7 @@ int gfs2_quota_sync(struct super_block *sb, int type, int wait)
1154 1159
1155static int gfs2_quota_sync_timeo(struct super_block *sb, int type) 1160static int gfs2_quota_sync_timeo(struct super_block *sb, int type)
1156{ 1161{
1157 return gfs2_quota_sync(sb, type, 0); 1162 return gfs2_quota_sync(sb, type);
1158} 1163}
1159 1164
1160int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) 1165int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id)
@@ -1549,10 +1554,14 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1549 if (error) 1554 if (error)
1550 return error; 1555 return error;
1551 1556
1557 error = gfs2_rs_alloc(ip);
1558 if (error)
1559 goto out_put;
1560
1552 mutex_lock(&ip->i_inode.i_mutex); 1561 mutex_lock(&ip->i_inode.i_mutex);
1553 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh); 1562 error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
1554 if (error) 1563 if (error)
1555 goto out_put; 1564 goto out_unlockput;
1556 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 1565 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1557 if (error) 1566 if (error)
1558 goto out_q; 1567 goto out_q;
@@ -1609,8 +1618,9 @@ out_i:
1609 gfs2_glock_dq_uninit(&i_gh); 1618 gfs2_glock_dq_uninit(&i_gh);
1610out_q: 1619out_q:
1611 gfs2_glock_dq_uninit(&q_gh); 1620 gfs2_glock_dq_uninit(&q_gh);
1612out_put: 1621out_unlockput:
1613 mutex_unlock(&ip->i_inode.i_mutex); 1622 mutex_unlock(&ip->i_inode.i_mutex);
1623out_put:
1614 qd_put(qd); 1624 qd_put(qd);
1615 return error; 1625 return error;
1616} 1626}
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 90bf1c302a98..f25d98b87904 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -26,7 +26,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid);
26extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 26extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
27 u32 uid, u32 gid); 27 u32 uid, u32 gid);
28 28
29extern int gfs2_quota_sync(struct super_block *sb, int type, int wait); 29extern int gfs2_quota_sync(struct super_block *sb, int type);
30extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); 30extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id);
31 31
32extern int gfs2_quota_init(struct gfs2_sbd *sdp); 32extern int gfs2_quota_init(struct gfs2_sbd *sdp);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f74fb9bd1973..4d34887a601d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -35,6 +35,9 @@
35#define BFITNOENT ((u32)~0) 35#define BFITNOENT ((u32)~0)
36#define NO_BLOCK ((u64)~0) 36#define NO_BLOCK ((u64)~0)
37 37
38#define RSRV_CONTENTION_FACTOR 4
39#define RGRP_RSRV_MAX_CONTENDERS 2
40
38#if BITS_PER_LONG == 32 41#if BITS_PER_LONG == 32
39#define LBITMASK (0x55555555UL) 42#define LBITMASK (0x55555555UL)
40#define LBITSKIP55 (0x55555555UL) 43#define LBITSKIP55 (0x55555555UL)
@@ -178,6 +181,57 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
178} 181}
179 182
180/** 183/**
184 * rs_cmp - multi-block reservation range compare
185 * @blk: absolute file system block number of the new reservation
186 * @len: number of blocks in the new reservation
187 * @rs: existing reservation to compare against
188 *
189 * returns: 1 if the block range is beyond the reach of the reservation
190 * -1 if the block range is before the start of the reservation
191 * 0 if the block range overlaps with the reservation
192 */
193static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
194{
195 u64 startblk = gfs2_rs_startblk(rs);
196
197 if (blk >= startblk + rs->rs_free)
198 return 1;
199 if (blk + len - 1 < startblk)
200 return -1;
201 return 0;
202}
203
204/**
205 * rs_find - Find a rgrp multi-block reservation that contains a given block
206 * @rgd: The rgrp
207 * @rgblk: The block we're looking for, relative to the rgrp
208 */
209static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk)
210{
211 struct rb_node **newn;
212 int rc;
213 u64 fsblk = rgblk + rgd->rd_data0;
214
215 spin_lock(&rgd->rd_rsspin);
216 newn = &rgd->rd_rstree.rb_node;
217 while (*newn) {
218 struct gfs2_blkreserv *cur =
219 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
220 rc = rs_cmp(fsblk, 1, cur);
221 if (rc < 0)
222 newn = &((*newn)->rb_left);
223 else if (rc > 0)
224 newn = &((*newn)->rb_right);
225 else {
226 spin_unlock(&rgd->rd_rsspin);
227 return cur;
228 }
229 }
230 spin_unlock(&rgd->rd_rsspin);
231 return NULL;
232}
233
234/**
181 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing 235 * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing
182 * a block in a given allocation state. 236 * a block in a given allocation state.
183 * @buf: the buffer that holds the bitmaps 237 * @buf: the buffer that holds the bitmaps
@@ -417,6 +471,137 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
417 } 471 }
418} 472}
419 473
474/**
475 * gfs2_rs_alloc - make sure we have a reservation assigned to the inode
476 * @ip: the inode for this reservation
477 */
478int gfs2_rs_alloc(struct gfs2_inode *ip)
479{
480 int error = 0;
481 struct gfs2_blkreserv *res;
482
483 if (ip->i_res)
484 return 0;
485
486 res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS);
487 if (!res)
488 error = -ENOMEM;
489
490 down_write(&ip->i_rw_mutex);
491 if (ip->i_res)
492 kmem_cache_free(gfs2_rsrv_cachep, res);
493 else
494 ip->i_res = res;
495 up_write(&ip->i_rw_mutex);
496 return error;
497}
498
499static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs)
500{
501 gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n",
502 rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk,
503 rs->rs_free);
504}
505
506/**
507 * __rs_deltree - remove a multi-block reservation from the rgd tree
508 * @rs: The reservation to remove
509 *
510 */
511static void __rs_deltree(struct gfs2_blkreserv *rs)
512{
513 struct gfs2_rgrpd *rgd;
514
515 if (!gfs2_rs_active(rs))
516 return;
517
518 rgd = rs->rs_rgd;
519 /* We can't do this: The reason is that when the rgrp is invalidated,
520 it's in the "middle" of acquiring the glock, but the HOLDER bit
521 isn't set yet:
522 BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/
523 trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL);
524
525 if (!RB_EMPTY_ROOT(&rgd->rd_rstree))
526 rb_erase(&rs->rs_node, &rgd->rd_rstree);
527 BUG_ON(!rgd->rd_rs_cnt);
528 rgd->rd_rs_cnt--;
529
530 if (rs->rs_free) {
531 /* return reserved blocks to the rgrp and the ip */
532 BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free);
533 rs->rs_rgd->rd_reserved -= rs->rs_free;
534 rs->rs_free = 0;
535 clear_bit(GBF_FULL, &rs->rs_bi->bi_flags);
536 smp_mb__after_clear_bit();
537 }
538 /* We can't change any of the step 1 or step 2 components of the rs.
539 E.g. We can't set rs_rgd to NULL because the rgd glock is held and
540 dequeued through this pointer.
541 Can't: atomic_set(&rs->rs_sizehint, 0);
542 Can't: rs->rs_requested = 0;
543 Can't: rs->rs_rgd = NULL;*/
544 rs->rs_bi = NULL;
545 rs->rs_biblk = 0;
546}
547
548/**
549 * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree
550 * @rs: The reservation to remove
551 *
552 */
553void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
554{
555 struct gfs2_rgrpd *rgd;
556
557 if (!gfs2_rs_active(rs))
558 return;
559
560 rgd = rs->rs_rgd;
561 spin_lock(&rgd->rd_rsspin);
562 __rs_deltree(rs);
563 spin_unlock(&rgd->rd_rsspin);
564}
565
566/**
567 * gfs2_rs_delete - delete a multi-block reservation
568 * @ip: The inode for this reservation
569 *
570 */
571void gfs2_rs_delete(struct gfs2_inode *ip)
572{
573 down_write(&ip->i_rw_mutex);
574 if (ip->i_res) {
575 gfs2_rs_deltree(ip->i_res);
576 trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE);
577 BUG_ON(ip->i_res->rs_free);
578 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
579 ip->i_res = NULL;
580 }
581 up_write(&ip->i_rw_mutex);
582}
583
584/**
585 * return_all_reservations - return all reserved blocks back to the rgrp.
586 * @rgd: the rgrp that needs its space back
587 *
588 * We previously reserved a bunch of blocks for allocation. Now we need to
589 * give them back. This leave the reservation structures in tact, but removes
590 * all of their corresponding "no-fly zones".
591 */
592static void return_all_reservations(struct gfs2_rgrpd *rgd)
593{
594 struct rb_node *n;
595 struct gfs2_blkreserv *rs;
596
597 spin_lock(&rgd->rd_rsspin);
598 while ((n = rb_first(&rgd->rd_rstree))) {
599 rs = rb_entry(n, struct gfs2_blkreserv, rs_node);
600 __rs_deltree(rs);
601 }
602 spin_unlock(&rgd->rd_rsspin);
603}
604
420void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) 605void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
421{ 606{
422 struct rb_node *n; 607 struct rb_node *n;
@@ -439,6 +624,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
439 624
440 gfs2_free_clones(rgd); 625 gfs2_free_clones(rgd);
441 kfree(rgd->rd_bits); 626 kfree(rgd->rd_bits);
627 return_all_reservations(rgd);
442 kmem_cache_free(gfs2_rgrpd_cachep, rgd); 628 kmem_cache_free(gfs2_rgrpd_cachep, rgd);
443 } 629 }
444} 630}
@@ -616,6 +802,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
616 rgd->rd_data0 = be64_to_cpu(buf.ri_data0); 802 rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
617 rgd->rd_data = be32_to_cpu(buf.ri_data); 803 rgd->rd_data = be32_to_cpu(buf.ri_data);
618 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); 804 rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
805 spin_lock_init(&rgd->rd_rsspin);
619 806
620 error = compute_bitstructs(rgd); 807 error = compute_bitstructs(rgd);
621 if (error) 808 if (error)
@@ -627,6 +814,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
627 goto fail; 814 goto fail;
628 815
629 rgd->rd_gl->gl_object = rgd; 816 rgd->rd_gl->gl_object = rgd;
817 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb;
630 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 818 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
631 if (rgd->rd_data > sdp->sd_max_rg_data) 819 if (rgd->rd_data > sdp->sd_max_rg_data)
632 sdp->sd_max_rg_data = rgd->rd_data; 820 sdp->sd_max_rg_data = rgd->rd_data;
@@ -736,9 +924,65 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
736 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved)); 924 memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));
737} 925}
738 926
927static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd)
928{
929 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
930 struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data;
931
932 if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free ||
933 rgl->rl_dinodes != str->rg_dinodes ||
934 rgl->rl_igeneration != str->rg_igeneration)
935 return 0;
936 return 1;
937}
938
939static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf)
940{
941 const struct gfs2_rgrp *str = buf;
942
943 rgl->rl_magic = cpu_to_be32(GFS2_MAGIC);
944 rgl->rl_flags = str->rg_flags;
945 rgl->rl_free = str->rg_free;
946 rgl->rl_dinodes = str->rg_dinodes;
947 rgl->rl_igeneration = str->rg_igeneration;
948 rgl->__pad = 0UL;
949}
950
951static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change)
952{
953 struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
954 u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change;
955 rgl->rl_unlinked = cpu_to_be32(unlinked);
956}
957
958static u32 count_unlinked(struct gfs2_rgrpd *rgd)
959{
960 struct gfs2_bitmap *bi;
961 const u32 length = rgd->rd_length;
962 const u8 *buffer = NULL;
963 u32 i, goal, count = 0;
964
965 for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) {
966 goal = 0;
967 buffer = bi->bi_bh->b_data + bi->bi_offset;
968 WARN_ON(!buffer_uptodate(bi->bi_bh));
969 while (goal < bi->bi_len * GFS2_NBBY) {
970 goal = gfs2_bitfit(buffer, bi->bi_len, goal,
971 GFS2_BLKST_UNLINKED);
972 if (goal == BFITNOENT)
973 break;
974 count++;
975 goal++;
976 }
977 }
978
979 return count;
980}
981
982
739/** 983/**
740 * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps 984 * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
741 * @gh: The glock holder for the resource group 985 * @rgd: the struct gfs2_rgrpd describing the RG to read in
742 * 986 *
743 * Read in all of a Resource Group's header and bitmap blocks. 987 * Read in all of a Resource Group's header and bitmap blocks.
744 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. 988 * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
@@ -746,9 +990,8 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
746 * Returns: errno 990 * Returns: errno
747 */ 991 */
748 992
749int gfs2_rgrp_go_lock(struct gfs2_holder *gh) 993int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
750{ 994{
751 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
752 struct gfs2_sbd *sdp = rgd->rd_sbd; 995 struct gfs2_sbd *sdp = rgd->rd_sbd;
753 struct gfs2_glock *gl = rgd->rd_gl; 996 struct gfs2_glock *gl = rgd->rd_gl;
754 unsigned int length = rgd->rd_length; 997 unsigned int length = rgd->rd_length;
@@ -756,6 +999,9 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
756 unsigned int x, y; 999 unsigned int x, y;
757 int error; 1000 int error;
758 1001
1002 if (rgd->rd_bits[0].bi_bh != NULL)
1003 return 0;
1004
759 for (x = 0; x < length; x++) { 1005 for (x = 0; x < length; x++) {
760 bi = rgd->rd_bits + x; 1006 bi = rgd->rd_bits + x;
761 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh); 1007 error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
@@ -782,7 +1028,20 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
782 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); 1028 rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
783 rgd->rd_free_clone = rgd->rd_free; 1029 rgd->rd_free_clone = rgd->rd_free;
784 } 1030 }
785 1031 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
1032 rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
1033 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
1034 rgd->rd_bits[0].bi_bh->b_data);
1035 }
1036 else if (sdp->sd_args.ar_rgrplvb) {
1037 if (!gfs2_rgrp_lvb_valid(rgd)){
1038 gfs2_consist_rgrpd(rgd);
1039 error = -EIO;
1040 goto fail;
1041 }
1042 if (rgd->rd_rgl->rl_unlinked == 0)
1043 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1044 }
786 return 0; 1045 return 0;
787 1046
788fail: 1047fail:
@@ -796,6 +1055,39 @@ fail:
796 return error; 1055 return error;
797} 1056}
798 1057
1058int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
1059{
1060 u32 rl_flags;
1061
1062 if (rgd->rd_flags & GFS2_RDF_UPTODATE)
1063 return 0;
1064
1065 if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
1066 return gfs2_rgrp_bh_get(rgd);
1067
1068 rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
1069 rl_flags &= ~GFS2_RDF_MASK;
1070 rgd->rd_flags &= GFS2_RDF_MASK;
1071 rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
1072 if (rgd->rd_rgl->rl_unlinked == 0)
1073 rgd->rd_flags &= ~GFS2_RDF_CHECK;
1074 rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
1075 rgd->rd_free_clone = rgd->rd_free;
1076 rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
1077 rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
1078 return 0;
1079}
1080
1081int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
1082{
1083 struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
1084 struct gfs2_sbd *sdp = rgd->rd_sbd;
1085
1086 if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb)
1087 return 0;
1088 return gfs2_rgrp_bh_get((struct gfs2_rgrpd *)gh->gh_gl->gl_object);
1089}
1090
799/** 1091/**
800 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() 1092 * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
801 * @gh: The glock holder for the resource group 1093 * @gh: The glock holder for the resource group
@@ -809,8 +1101,10 @@ void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
809 1101
810 for (x = 0; x < length; x++) { 1102 for (x = 0; x < length; x++) {
811 struct gfs2_bitmap *bi = rgd->rd_bits + x; 1103 struct gfs2_bitmap *bi = rgd->rd_bits + x;
812 brelse(bi->bi_bh); 1104 if (bi->bi_bh) {
813 bi->bi_bh = NULL; 1105 brelse(bi->bi_bh);
1106 bi->bi_bh = NULL;
1107 }
814 } 1108 }
815 1109
816} 1110}
@@ -954,6 +1248,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
954 rgd->rd_flags |= GFS2_RGF_TRIMMED; 1248 rgd->rd_flags |= GFS2_RGF_TRIMMED;
955 gfs2_trans_add_bh(rgd->rd_gl, bh, 1); 1249 gfs2_trans_add_bh(rgd->rd_gl, bh, 1);
956 gfs2_rgrp_out(rgd, bh->b_data); 1250 gfs2_rgrp_out(rgd, bh->b_data);
1251 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data);
957 gfs2_trans_end(sdp); 1252 gfs2_trans_end(sdp);
958 } 1253 }
959 } 1254 }
@@ -974,38 +1269,184 @@ out:
974} 1269}
975 1270
976/** 1271/**
977 * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode 1272 * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree
978 * @ip: the incore GFS2 inode structure 1273 * @bi: the bitmap with the blocks
1274 * @ip: the inode structure
1275 * @biblk: the 32-bit block number relative to the start of the bitmap
1276 * @amount: the number of blocks to reserve
979 * 1277 *
980 * Returns: the struct gfs2_qadata 1278 * Returns: NULL - reservation was already taken, so not inserted
1279 * pointer to the inserted reservation
981 */ 1280 */
1281static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi,
1282 struct gfs2_inode *ip, u32 biblk,
1283 int amount)
1284{
1285 struct rb_node **newn, *parent = NULL;
1286 int rc;
1287 struct gfs2_blkreserv *rs = ip->i_res;
1288 struct gfs2_rgrpd *rgd = rs->rs_rgd;
1289 u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0;
982 1290
983struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip) 1291 spin_lock(&rgd->rd_rsspin);
1292 newn = &rgd->rd_rstree.rb_node;
1293 BUG_ON(!ip->i_res);
1294 BUG_ON(gfs2_rs_active(rs));
1295 /* Figure out where to put new node */
1296 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
1297 while (*newn) {
1298 struct gfs2_blkreserv *cur =
1299 rb_entry(*newn, struct gfs2_blkreserv, rs_node);
1300
1301 parent = *newn;
1302 rc = rs_cmp(fsblock, amount, cur);
1303 if (rc > 0)
1304 newn = &((*newn)->rb_right);
1305 else if (rc < 0)
1306 newn = &((*newn)->rb_left);
1307 else {
1308 spin_unlock(&rgd->rd_rsspin);
1309 return NULL; /* reservation already in use */
1310 }
1311 }
1312
1313 /* Do our reservation work */
1314 rs = ip->i_res;
1315 rs->rs_free = amount;
1316 rs->rs_biblk = biblk;
1317 rs->rs_bi = bi;
1318 rb_link_node(&rs->rs_node, parent, newn);
1319 rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
1320
1321 /* Do our inode accounting for the reservation */
1322 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
1323
1324 /* Do our rgrp accounting for the reservation */
1325 rgd->rd_reserved += amount; /* blocks reserved */
1326 rgd->rd_rs_cnt++; /* number of in-tree reservations */
1327 spin_unlock(&rgd->rd_rsspin);
1328 trace_gfs2_rs(ip, rs, TRACE_RS_INSERT);
1329 return rs;
1330}
1331
1332/**
1333 * unclaimed_blocks - return number of blocks that aren't spoken for
1334 */
1335static u32 unclaimed_blocks(struct gfs2_rgrpd *rgd)
984{ 1336{
985 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1337 return rgd->rd_free_clone - rgd->rd_reserved;
986 int error;
987 BUG_ON(ip->i_qadata != NULL);
988 ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
989 error = gfs2_rindex_update(sdp);
990 if (error)
991 fs_warn(sdp, "rindex update returns %d\n", error);
992 return ip->i_qadata;
993} 1338}
994 1339
995/** 1340/**
996 * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode 1341 * rg_mblk_search - find a group of multiple free blocks
997 * @ip: the incore GFS2 inode structure 1342 * @rgd: the resource group descriptor
1343 * @rs: the block reservation
1344 * @ip: pointer to the inode for which we're reserving blocks
998 * 1345 *
999 * Returns: the struct gfs2_qadata 1346 * This is very similar to rgblk_search, except we're looking for whole
1347 * 64-bit words that represent a chunk of 32 free blocks. I'm only focusing
1348 * on aligned dwords for speed's sake.
1349 *
1350 * Returns: 0 if successful or BFITNOENT if there isn't enough free space
1000 */ 1351 */
1001 1352
1002static int gfs2_blkrsv_get(struct gfs2_inode *ip) 1353static int rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1003{ 1354{
1004 BUG_ON(ip->i_res != NULL); 1355 struct gfs2_bitmap *bi = rgd->rd_bits;
1005 ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); 1356 const u32 length = rgd->rd_length;
1006 if (!ip->i_res) 1357 u32 blk;
1007 return -ENOMEM; 1358 unsigned int buf, x, search_bytes;
1008 return 0; 1359 u8 *buffer = NULL;
1360 u8 *ptr, *end, *nonzero;
1361 u32 goal, rsv_bytes;
1362 struct gfs2_blkreserv *rs;
1363 u32 best_rs_bytes, unclaimed;
1364 int best_rs_blocks;
1365
1366 /* Find bitmap block that contains bits for goal block */
1367 if (rgrp_contains_block(rgd, ip->i_goal))
1368 goal = ip->i_goal - rgd->rd_data0;
1369 else
1370 goal = rgd->rd_last_alloc;
1371 for (buf = 0; buf < length; buf++) {
1372 bi = rgd->rd_bits + buf;
1373 /* Convert scope of "goal" from rgrp-wide to within
1374 found bit block */
1375 if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) {
1376 goal -= bi->bi_start * GFS2_NBBY;
1377 goto do_search;
1378 }
1379 }
1380 buf = 0;
1381 goal = 0;
1382
1383do_search:
1384 best_rs_blocks = max_t(int, atomic_read(&ip->i_res->rs_sizehint),
1385 (RGRP_RSRV_MINBLKS * rgd->rd_length));
1386 best_rs_bytes = (best_rs_blocks *
1387 (1 + (RSRV_CONTENTION_FACTOR * rgd->rd_rs_cnt))) /
1388 GFS2_NBBY; /* 1 + is for our not-yet-created reservation */
1389 best_rs_bytes = ALIGN(best_rs_bytes, sizeof(u64));
1390 unclaimed = unclaimed_blocks(rgd);
1391 if (best_rs_bytes * GFS2_NBBY > unclaimed)
1392 best_rs_bytes = unclaimed >> GFS2_BIT_SIZE;
1393
1394 for (x = 0; x <= length; x++) {
1395 bi = rgd->rd_bits + buf;
1396
1397 if (test_bit(GBF_FULL, &bi->bi_flags))
1398 goto skip;
1399
1400 WARN_ON(!buffer_uptodate(bi->bi_bh));
1401 if (bi->bi_clone)
1402 buffer = bi->bi_clone + bi->bi_offset;
1403 else
1404 buffer = bi->bi_bh->b_data + bi->bi_offset;
1405
1406 /* We have to keep the reservations aligned on u64 boundaries
1407 otherwise we could get situations where a byte can't be
1408 used because it's after a reservation, but a free bit still
1409 is within the reservation's area. */
1410 ptr = buffer + ALIGN(goal >> GFS2_BIT_SIZE, sizeof(u64));
1411 end = (buffer + bi->bi_len);
1412 while (ptr < end) {
1413 rsv_bytes = 0;
1414 if ((ptr + best_rs_bytes) <= end)
1415 search_bytes = best_rs_bytes;
1416 else
1417 search_bytes = end - ptr;
1418 BUG_ON(!search_bytes);
1419 nonzero = memchr_inv(ptr, 0, search_bytes);
1420 /* If the lot is all zeroes, reserve the whole size. If
1421 there's enough zeroes to satisfy the request, use
1422 what we can. If there's not enough, keep looking. */
1423 if (nonzero == NULL)
1424 rsv_bytes = search_bytes;
1425 else if ((nonzero - ptr) * GFS2_NBBY >=
1426 ip->i_res->rs_requested)
1427 rsv_bytes = (nonzero - ptr);
1428
1429 if (rsv_bytes) {
1430 blk = ((ptr - buffer) * GFS2_NBBY);
1431 BUG_ON(blk >= bi->bi_len * GFS2_NBBY);
1432 rs = rs_insert(bi, ip, blk,
1433 rsv_bytes * GFS2_NBBY);
1434 if (IS_ERR(rs))
1435 return PTR_ERR(rs);
1436 if (rs)
1437 return 0;
1438 }
1439 ptr += ALIGN(search_bytes, sizeof(u64));
1440 }
1441skip:
1442 /* Try next bitmap block (wrap back to rgrp header
1443 if at end) */
1444 buf++;
1445 buf %= length;
1446 goal = 0;
1447 }
1448
1449 return BFITNOENT;
1009} 1450}
1010 1451
1011/** 1452/**
@@ -1014,24 +1455,26 @@ static int gfs2_blkrsv_get(struct gfs2_inode *ip)
1014 * @ip: the inode 1455 * @ip: the inode
1015 * 1456 *
1016 * If there's room for the requested blocks to be allocated from the RG: 1457 * If there's room for the requested blocks to be allocated from the RG:
1458 * This will try to get a multi-block reservation first, and if that doesn't
1459 * fit, it will take what it can.
1017 * 1460 *
1018 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) 1461 * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
1019 */ 1462 */
1020 1463
1021static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) 1464static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1022{ 1465{
1023 const struct gfs2_blkreserv *rs = ip->i_res; 1466 struct gfs2_blkreserv *rs = ip->i_res;
1024 1467
1025 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 1468 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
1026 return 0; 1469 return 0;
1027 if (rgd->rd_free_clone >= rs->rs_requested) 1470 /* Look for a multi-block reservation. */
1471 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS &&
1472 rg_mblk_search(rgd, ip) != BFITNOENT)
1473 return 1;
1474 if (unclaimed_blocks(rgd) >= rs->rs_requested)
1028 return 1; 1475 return 1;
1029 return 0;
1030}
1031 1476
1032static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk) 1477 return 0;
1033{
1034 return (bi->bi_start * GFS2_NBBY) + blk;
1035} 1478}
1036 1479
1037/** 1480/**
@@ -1101,119 +1544,120 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
1101} 1544}
1102 1545
1103/** 1546/**
1104 * get_local_rgrp - Choose and lock a rgrp for allocation 1547 * gfs2_inplace_reserve - Reserve space in the filesystem
1105 * @ip: the inode to reserve space for 1548 * @ip: the inode to reserve space for
1106 * @last_unlinked: the last unlinked block 1549 * @requested: the number of blocks to be reserved
1107 *
1108 * Try to acquire rgrp in way which avoids contending with others.
1109 * 1550 *
1110 * Returns: errno 1551 * Returns: errno
1111 */ 1552 */
1112 1553
1113static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked) 1554int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1114{ 1555{
1115 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1556 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1116 struct gfs2_rgrpd *rgd, *begin = NULL; 1557 struct gfs2_rgrpd *begin = NULL;
1117 struct gfs2_blkreserv *rs = ip->i_res; 1558 struct gfs2_blkreserv *rs = ip->i_res;
1118 int error, rg_locked, flags = LM_FLAG_TRY; 1559 int error = 0, rg_locked, flags = LM_FLAG_TRY;
1560 u64 last_unlinked = NO_BLOCK;
1119 int loops = 0; 1561 int loops = 0;
1120 1562
1121 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1563 if (sdp->sd_args.ar_rgrplvb)
1122 rgd = begin = ip->i_rgd; 1564 flags |= GL_SKIP;
1123 else 1565 rs->rs_requested = requested;
1124 rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1566 if (gfs2_assert_warn(sdp, requested)) {
1125 1567 error = -EINVAL;
1126 if (rgd == NULL) 1568 goto out;
1569 }
1570 if (gfs2_rs_active(rs)) {
1571 begin = rs->rs_rgd;
1572 flags = 0; /* Yoda: Do or do not. There is no try */
1573 } else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) {
1574 rs->rs_rgd = begin = ip->i_rgd;
1575 } else {
1576 rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1577 }
1578 if (rs->rs_rgd == NULL)
1127 return -EBADSLT; 1579 return -EBADSLT;
1128 1580
1129 while (loops < 3) { 1581 while (loops < 3) {
1130 rg_locked = 0; 1582 rg_locked = 0;
1131 1583
1132 if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { 1584 if (gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
1133 rg_locked = 1; 1585 rg_locked = 1;
1134 error = 0; 1586 error = 0;
1587 } else if (!loops && !gfs2_rs_active(rs) &&
1588 rs->rs_rgd->rd_rs_cnt > RGRP_RSRV_MAX_CONTENDERS) {
1589 /* If the rgrp already is maxed out for contenders,
1590 we can eliminate it as a "first pass" without even
1591 requesting the rgrp glock. */
1592 error = GLR_TRYFAILED;
1135 } else { 1593 } else {
1136 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1594 error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
1137 flags, &rs->rs_rgd_gh); 1595 LM_ST_EXCLUSIVE, flags,
1596 &rs->rs_rgd_gh);
1597 if (!error && sdp->sd_args.ar_rgrplvb) {
1598 error = update_rgrp_lvb(rs->rs_rgd);
1599 if (error) {
1600 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1601 return error;
1602 }
1603 }
1138 } 1604 }
1139 switch (error) { 1605 switch (error) {
1140 case 0: 1606 case 0:
1141 if (try_rgrp_fit(rgd, ip)) { 1607 if (gfs2_rs_active(rs)) {
1142 ip->i_rgd = rgd; 1608 if (unclaimed_blocks(rs->rs_rgd) +
1609 rs->rs_free >= rs->rs_requested) {
1610 ip->i_rgd = rs->rs_rgd;
1611 return 0;
1612 }
1613 /* We have a multi-block reservation, but the
1614 rgrp doesn't have enough free blocks to
1615 satisfy the request. Free the reservation
1616 and look for a suitable rgrp. */
1617 gfs2_rs_deltree(rs);
1618 }
1619 if (try_rgrp_fit(rs->rs_rgd, ip)) {
1620 if (sdp->sd_args.ar_rgrplvb)
1621 gfs2_rgrp_bh_get(rs->rs_rgd);
1622 ip->i_rgd = rs->rs_rgd;
1143 return 0; 1623 return 0;
1144 } 1624 }
1145 if (rgd->rd_flags & GFS2_RDF_CHECK) 1625 if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK) {
1146 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1626 if (sdp->sd_args.ar_rgrplvb)
1627 gfs2_rgrp_bh_get(rs->rs_rgd);
1628 try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
1629 ip->i_no_addr);
1630 }
1147 if (!rg_locked) 1631 if (!rg_locked)
1148 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1632 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1149 /* fall through */ 1633 /* fall through */
1150 case GLR_TRYFAILED: 1634 case GLR_TRYFAILED:
1151 rgd = gfs2_rgrpd_get_next(rgd); 1635 rs->rs_rgd = gfs2_rgrpd_get_next(rs->rs_rgd);
1152 if (rgd == begin) { 1636 rs->rs_rgd = rs->rs_rgd ? : begin; /* if NULL, wrap */
1153 flags = 0; 1637 if (rs->rs_rgd != begin) /* If we didn't wrap */
1154 loops++; 1638 break;
1155 } 1639
1640 flags &= ~LM_FLAG_TRY;
1641 loops++;
1642 /* Check that fs hasn't grown if writing to rindex */
1643 if (ip == GFS2_I(sdp->sd_rindex) &&
1644 !sdp->sd_rindex_uptodate) {
1645 error = gfs2_ri_update(ip);
1646 if (error)
1647 goto out;
1648 } else if (loops == 2)
1649 /* Flushing the log may release space */
1650 gfs2_log_flush(sdp, NULL);
1156 break; 1651 break;
1157 default: 1652 default:
1158 return error; 1653 goto out;
1159 } 1654 }
1160 } 1655 }
1161 1656 error = -ENOSPC;
1162 return -ENOSPC;
1163}
1164
1165static void gfs2_blkrsv_put(struct gfs2_inode *ip)
1166{
1167 BUG_ON(ip->i_res == NULL);
1168 kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
1169 ip->i_res = NULL;
1170}
1171
1172/**
1173 * gfs2_inplace_reserve - Reserve space in the filesystem
1174 * @ip: the inode to reserve space for
1175 * @requested: the number of blocks to be reserved
1176 *
1177 * Returns: errno
1178 */
1179
1180int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1181{
1182 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1183 struct gfs2_blkreserv *rs;
1184 int error;
1185 u64 last_unlinked = NO_BLOCK;
1186 int tries = 0;
1187
1188 error = gfs2_blkrsv_get(ip);
1189 if (error)
1190 return error;
1191
1192 rs = ip->i_res;
1193 rs->rs_requested = requested;
1194 if (gfs2_assert_warn(sdp, requested)) {
1195 error = -EINVAL;
1196 goto out;
1197 }
1198
1199 do {
1200 error = get_local_rgrp(ip, &last_unlinked);
1201 if (error != -ENOSPC)
1202 break;
1203 /* Check that fs hasn't grown if writing to rindex */
1204 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
1205 error = gfs2_ri_update(ip);
1206 if (error)
1207 break;
1208 continue;
1209 }
1210 /* Flushing the log may release space */
1211 gfs2_log_flush(sdp, NULL);
1212 } while (tries++ < 3);
1213 1657
1214out: 1658out:
1215 if (error) 1659 if (error)
1216 gfs2_blkrsv_put(ip); 1660 rs->rs_requested = 0;
1217 return error; 1661 return error;
1218} 1662}
1219 1663
@@ -1228,9 +1672,15 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1228{ 1672{
1229 struct gfs2_blkreserv *rs = ip->i_res; 1673 struct gfs2_blkreserv *rs = ip->i_res;
1230 1674
1675 if (!rs)
1676 return;
1677
1678 if (!rs->rs_free)
1679 gfs2_rs_deltree(rs);
1680
1231 if (rs->rs_rgd_gh.gh_gl) 1681 if (rs->rs_rgd_gh.gh_gl)
1232 gfs2_glock_dq_uninit(&rs->rs_rgd_gh); 1682 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1233 gfs2_blkrsv_put(ip); 1683 rs->rs_requested = 0;
1234} 1684}
1235 1685
1236/** 1686/**
@@ -1326,7 +1776,27 @@ do_search:
1326 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1776 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1327 buffer = bi->bi_clone + bi->bi_offset; 1777 buffer = bi->bi_clone + bi->bi_offset;
1328 1778
1329 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state); 1779 while (1) {
1780 struct gfs2_blkreserv *rs;
1781 u32 rgblk;
1782
1783 biblk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
1784 if (biblk == BFITNOENT)
1785 break;
1786 /* Check if this block is reserved() */
1787 rgblk = gfs2_bi2rgd_blk(bi, biblk);
1788 rs = rs_find(rgd, rgblk);
1789 if (rs == NULL)
1790 break;
1791
1792 BUG_ON(rs->rs_bi != bi);
1793 biblk = BFITNOENT;
1794 /* This should jump to the first block after the
1795 reservation. */
1796 goal = rs->rs_biblk + rs->rs_free;
1797 if (goal >= bi->bi_len * GFS2_NBBY)
1798 break;
1799 }
1330 if (biblk != BFITNOENT) 1800 if (biblk != BFITNOENT)
1331 break; 1801 break;
1332 1802
@@ -1362,8 +1832,9 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1362 u32 blk, bool dinode, unsigned int *n) 1832 u32 blk, bool dinode, unsigned int *n)
1363{ 1833{
1364 const unsigned int elen = *n; 1834 const unsigned int elen = *n;
1365 u32 goal; 1835 u32 goal, rgblk;
1366 const u8 *buffer = NULL; 1836 const u8 *buffer = NULL;
1837 struct gfs2_blkreserv *rs;
1367 1838
1368 *n = 0; 1839 *n = 0;
1369 buffer = bi->bi_bh->b_data + bi->bi_offset; 1840 buffer = bi->bi_bh->b_data + bi->bi_offset;
@@ -1376,6 +1847,10 @@ static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1376 goal++; 1847 goal++;
1377 if (goal >= (bi->bi_len * GFS2_NBBY)) 1848 if (goal >= (bi->bi_len * GFS2_NBBY))
1378 break; 1849 break;
1850 rgblk = gfs2_bi2rgd_blk(bi, goal);
1851 rs = rs_find(rgd, rgblk);
1852 if (rs) /* Oops, we bumped into someone's reservation */
1853 break;
1379 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != 1854 if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) !=
1380 GFS2_BLKST_FREE) 1855 GFS2_BLKST_FREE)
1381 break; 1856 break;
@@ -1451,12 +1926,22 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,
1451 1926
1452int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) 1927int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
1453{ 1928{
1454 const struct gfs2_rgrpd *rgd = gl->gl_object; 1929 struct gfs2_rgrpd *rgd = gl->gl_object;
1930 struct gfs2_blkreserv *trs;
1931 const struct rb_node *n;
1932
1455 if (rgd == NULL) 1933 if (rgd == NULL)
1456 return 0; 1934 return 0;
1457 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", 1935 gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u\n",
1458 (unsigned long long)rgd->rd_addr, rgd->rd_flags, 1936 (unsigned long long)rgd->rd_addr, rgd->rd_flags,
1459 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); 1937 rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
1938 rgd->rd_reserved);
1939 spin_lock(&rgd->rd_rsspin);
1940 for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
1941 trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
1942 dump_rs(seq, trs);
1943 }
1944 spin_unlock(&rgd->rd_rsspin);
1460 return 0; 1945 return 0;
1461} 1946}
1462 1947
@@ -1471,10 +1956,63 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1471} 1956}
1472 1957
1473/** 1958/**
1959 * claim_reserved_blks - Claim previously reserved blocks
1960 * @ip: the inode that's claiming the reservation
1961 * @dinode: 1 if this block is a dinode block, otherwise data block
1962 * @nblocks: desired extent length
1963 *
1964 * Lay claim to previously allocated block reservation blocks.
1965 * Returns: Starting block number of the blocks claimed.
1966 * Sets *nblocks to the actual extent length allocated.
1967 */
1968static u64 claim_reserved_blks(struct gfs2_inode *ip, bool dinode,
1969 unsigned int *nblocks)
1970{
1971 struct gfs2_blkreserv *rs = ip->i_res;
1972 struct gfs2_rgrpd *rgd = rs->rs_rgd;
1973 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1974 struct gfs2_bitmap *bi;
1975 u64 start_block = gfs2_rs_startblk(rs);
1976 const unsigned int elen = *nblocks;
1977
1978 /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/
1979 gfs2_assert_withdraw(sdp, rgd);
1980 /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/
1981 bi = rs->rs_bi;
1982 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1983
1984 for (*nblocks = 0; *nblocks < elen && rs->rs_free; (*nblocks)++) {
1985 /* Make sure the bitmap hasn't changed */
1986 gfs2_setbit(rgd, bi->bi_clone, bi, rs->rs_biblk,
1987 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1988 rs->rs_biblk++;
1989 rs->rs_free--;
1990
1991 BUG_ON(!rgd->rd_reserved);
1992 rgd->rd_reserved--;
1993 dinode = false;
1994 trace_gfs2_rs(ip, rs, TRACE_RS_CLAIM);
1995 }
1996
1997 if (!rs->rs_free) {
1998 struct gfs2_rgrpd *rgd = ip->i_res->rs_rgd;
1999
2000 gfs2_rs_deltree(rs);
2001 /* -nblocks because we haven't returned to do the math yet.
2002 I'm doing the math backwards to prevent negative numbers,
2003 but think of it as:
2004 if (unclaimed_blocks(rgd) - *nblocks >= RGRP_RSRV_MINBLKS */
2005 if (unclaimed_blocks(rgd) >= RGRP_RSRV_MINBLKS + *nblocks)
2006 rg_mblk_search(rgd, ip);
2007 }
2008 return start_block;
2009}
2010
2011/**
1474 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode 2012 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
1475 * @ip: the inode to allocate the block for 2013 * @ip: the inode to allocate the block for
1476 * @bn: Used to return the starting block number 2014 * @bn: Used to return the starting block number
1477 * @ndata: requested number of blocks/extent length (value/result) 2015 * @nblocks: requested number of blocks/extent length (value/result)
1478 * @dinode: 1 if we're allocating a dinode block, else 0 2016 * @dinode: 1 if we're allocating a dinode block, else 0
1479 * @generation: the generation number of the inode 2017 * @generation: the generation number of the inode
1480 * 2018 *
@@ -1496,23 +2034,37 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1496 /* Only happens if there is a bug in gfs2, return something distinctive 2034 /* Only happens if there is a bug in gfs2, return something distinctive
1497 * to ensure that it is noticed. 2035 * to ensure that it is noticed.
1498 */ 2036 */
1499 if (ip->i_res == NULL) 2037 if (ip->i_res->rs_requested == 0)
1500 return -ECANCELED; 2038 return -ECANCELED;
1501 2039
1502 rgd = ip->i_rgd; 2040 /* Check if we have a multi-block reservation, and if so, claim the
1503 2041 next free block from it. */
1504 if (!dinode && rgrp_contains_block(rgd, ip->i_goal)) 2042 if (gfs2_rs_active(ip->i_res)) {
1505 goal = ip->i_goal - rgd->rd_data0; 2043 BUG_ON(!ip->i_res->rs_free);
1506 else 2044 rgd = ip->i_res->rs_rgd;
1507 goal = rgd->rd_last_alloc; 2045 block = claim_reserved_blks(ip, dinode, nblocks);
1508 2046 } else {
1509 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi); 2047 rgd = ip->i_rgd;
1510 2048
1511 /* Since all blocks are reserved in advance, this shouldn't happen */ 2049 if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
1512 if (blk == BFITNOENT) 2050 goal = ip->i_goal - rgd->rd_data0;
1513 goto rgrp_error; 2051 else
2052 goal = rgd->rd_last_alloc;
2053
2054 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
2055
2056 /* Since all blocks are reserved in advance, this shouldn't
2057 happen */
2058 if (blk == BFITNOENT) {
2059 printk(KERN_WARNING "BFITNOENT, nblocks=%u\n",
2060 *nblocks);
2061 printk(KERN_WARNING "FULL=%d\n",
2062 test_bit(GBF_FULL, &rgd->rd_bits->bi_flags));
2063 goto rgrp_error;
2064 }
1514 2065
1515 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks); 2066 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
2067 }
1516 ndata = *nblocks; 2068 ndata = *nblocks;
1517 if (dinode) 2069 if (dinode)
1518 ndata--; 2070 ndata--;
@@ -1529,8 +2081,10 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1529 brelse(dibh); 2081 brelse(dibh);
1530 } 2082 }
1531 } 2083 }
1532 if (rgd->rd_free < *nblocks) 2084 if (rgd->rd_free < *nblocks) {
2085 printk(KERN_WARNING "nblocks=%u\n", *nblocks);
1533 goto rgrp_error; 2086 goto rgrp_error;
2087 }
1534 2088
1535 rgd->rd_free -= *nblocks; 2089 rgd->rd_free -= *nblocks;
1536 if (dinode) { 2090 if (dinode) {
@@ -1542,6 +2096,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1542 2096
1543 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2097 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1544 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2098 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2099 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
1545 2100
1546 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); 2101 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
1547 if (dinode) 2102 if (dinode)
@@ -1588,6 +2143,7 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1588 rgd->rd_flags &= ~GFS2_RGF_TRIMMED; 2143 rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
1589 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2144 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1590 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2145 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2146 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
1591 2147
1592 /* Directories keep their data in the metadata address space */ 2148 /* Directories keep their data in the metadata address space */
1593 if (meta || ip->i_depth) 2149 if (meta || ip->i_depth)
@@ -1624,6 +2180,8 @@ void gfs2_unlink_di(struct inode *inode)
1624 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); 2180 trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
1625 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2181 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1626 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2182 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2183 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2184 update_rgrp_lvb_unlinked(rgd, 1);
1627} 2185}
1628 2186
1629static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) 2187static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
@@ -1643,6 +2201,8 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
1643 2201
1644 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 2202 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1645 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 2203 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
2204 gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data);
2205 update_rgrp_lvb_unlinked(rgd, -1);
1646 2206
1647 gfs2_statfs_change(sdp, 0, +1, -1); 2207 gfs2_statfs_change(sdp, 0, +1, -1);
1648} 2208}
@@ -1784,6 +2344,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
1784 for (x = 0; x < rlist->rl_rgrps; x++) 2344 for (x = 0; x < rlist->rl_rgrps; x++)
1785 gfs2_holder_uninit(&rlist->rl_ghs[x]); 2345 gfs2_holder_uninit(&rlist->rl_ghs[x]);
1786 kfree(rlist->rl_ghs); 2346 kfree(rlist->rl_ghs);
2347 rlist->rl_ghs = NULL;
1787 } 2348 }
1788} 2349}
1789 2350
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index b4b10f4de25f..ca6e26729b86 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -13,6 +13,14 @@
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/uaccess.h> 14#include <linux/uaccess.h>
15 15
16/* Since each block in the file system is represented by two bits in the
17 * bitmap, one 64-bit word in the bitmap will represent 32 blocks.
18 * By reserving 32 blocks at a time, we can optimize / shortcut how we search
19 * through the bitmaps by looking a word at a time.
20 */
21#define RGRP_RSRV_MINBYTES 8
22#define RGRP_RSRV_MINBLKS ((u32)(RGRP_RSRV_MINBYTES * GFS2_NBBY))
23
16struct gfs2_rgrpd; 24struct gfs2_rgrpd;
17struct gfs2_sbd; 25struct gfs2_sbd;
18struct gfs2_holder; 26struct gfs2_holder;
@@ -29,13 +37,7 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
29extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); 37extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
30extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); 38extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
31 39
32extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip); 40extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
33static inline void gfs2_qadata_put(struct gfs2_inode *ip)
34{
35 BUG_ON(ip->i_qadata == NULL);
36 kfree(ip->i_qadata);
37 ip->i_qadata = NULL;
38}
39 41
40extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested); 42extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
41extern void gfs2_inplace_release(struct gfs2_inode *ip); 43extern void gfs2_inplace_release(struct gfs2_inode *ip);
@@ -43,6 +45,9 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
43extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n, 45extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
44 bool dinode, u64 *generation); 46 bool dinode, u64 *generation);
45 47
48extern int gfs2_rs_alloc(struct gfs2_inode *ip);
49extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
50extern void gfs2_rs_delete(struct gfs2_inode *ip);
46extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); 51extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
47extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 52extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
48extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 53extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
@@ -68,4 +73,30 @@ extern int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
68 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed); 73 const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed);
69extern int gfs2_fitrim(struct file *filp, void __user *argp); 74extern int gfs2_fitrim(struct file *filp, void __user *argp);
70 75
76/* This is how to tell if a multi-block reservation is "inplace" reserved: */
77static inline int gfs2_mb_reserved(struct gfs2_inode *ip)
78{
79 if (ip->i_res && ip->i_res->rs_requested)
80 return 1;
81 return 0;
82}
83
84/* This is how to tell if a multi-block reservation is in the rgrp tree: */
85static inline int gfs2_rs_active(struct gfs2_blkreserv *rs)
86{
87 if (rs && rs->rs_bi)
88 return 1;
89 return 0;
90}
91
92static inline u32 gfs2_bi2rgd_blk(const struct gfs2_bitmap *bi, u32 blk)
93{
94 return (bi->bi_start * GFS2_NBBY) + blk;
95}
96
97static inline u64 gfs2_rs_startblk(const struct gfs2_blkreserv *rs)
98{
99 return gfs2_bi2rgd_blk(rs->rs_bi, rs->rs_biblk) + rs->rs_rgd->rd_data0;
100}
101
71#endif /* __RGRP_DOT_H__ */ 102#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 713e621c240b..fc3168f47a14 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -78,6 +78,8 @@ enum {
78 Opt_quota_quantum, 78 Opt_quota_quantum,
79 Opt_barrier, 79 Opt_barrier,
80 Opt_nobarrier, 80 Opt_nobarrier,
81 Opt_rgrplvb,
82 Opt_norgrplvb,
81 Opt_error, 83 Opt_error,
82}; 84};
83 85
@@ -115,6 +117,8 @@ static const match_table_t tokens = {
115 {Opt_quota_quantum, "quota_quantum=%d"}, 117 {Opt_quota_quantum, "quota_quantum=%d"},
116 {Opt_barrier, "barrier"}, 118 {Opt_barrier, "barrier"},
117 {Opt_nobarrier, "nobarrier"}, 119 {Opt_nobarrier, "nobarrier"},
120 {Opt_rgrplvb, "rgrplvb"},
121 {Opt_norgrplvb, "norgrplvb"},
118 {Opt_error, NULL} 122 {Opt_error, NULL}
119}; 123};
120 124
@@ -267,6 +271,12 @@ int gfs2_mount_args(struct gfs2_args *args, char *options)
267 case Opt_nobarrier: 271 case Opt_nobarrier:
268 args->ar_nobarrier = 1; 272 args->ar_nobarrier = 1;
269 break; 273 break;
274 case Opt_rgrplvb:
275 args->ar_rgrplvb = 1;
276 break;
277 case Opt_norgrplvb:
278 args->ar_rgrplvb = 0;
279 break;
270 case Opt_error: 280 case Opt_error:
271 default: 281 default:
272 printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); 282 printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o);
@@ -838,7 +848,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
838 int error; 848 int error;
839 849
840 flush_workqueue(gfs2_delete_workqueue); 850 flush_workqueue(gfs2_delete_workqueue);
841 gfs2_quota_sync(sdp->sd_vfs, 0, 1); 851 gfs2_quota_sync(sdp->sd_vfs, 0);
842 gfs2_statfs_sync(sdp->sd_vfs, 0); 852 gfs2_statfs_sync(sdp->sd_vfs, 0);
843 853
844 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, 854 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
@@ -952,6 +962,8 @@ restart:
952static int gfs2_sync_fs(struct super_block *sb, int wait) 962static int gfs2_sync_fs(struct super_block *sb, int wait)
953{ 963{
954 struct gfs2_sbd *sdp = sb->s_fs_info; 964 struct gfs2_sbd *sdp = sb->s_fs_info;
965
966 gfs2_quota_sync(sb, -1);
955 if (wait && sdp) 967 if (wait && sdp)
956 gfs2_log_flush(sdp, NULL); 968 gfs2_log_flush(sdp, NULL);
957 return 0; 969 return 0;
@@ -1379,6 +1391,8 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1379 seq_printf(s, ",nobarrier"); 1391 seq_printf(s, ",nobarrier");
1380 if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) 1392 if (test_bit(SDF_DEMOTE, &sdp->sd_flags))
1381 seq_printf(s, ",demote_interface_used"); 1393 seq_printf(s, ",demote_interface_used");
1394 if (args->ar_rgrplvb)
1395 seq_printf(s, ",rgrplvb");
1382 return 0; 1396 return 0;
1383} 1397}
1384 1398
@@ -1399,7 +1413,6 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
1399static int gfs2_dinode_dealloc(struct gfs2_inode *ip) 1413static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1400{ 1414{
1401 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1415 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1402 struct gfs2_qadata *qa;
1403 struct gfs2_rgrpd *rgd; 1416 struct gfs2_rgrpd *rgd;
1404 struct gfs2_holder gh; 1417 struct gfs2_holder gh;
1405 int error; 1418 int error;
@@ -1409,13 +1422,13 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1409 return -EIO; 1422 return -EIO;
1410 } 1423 }
1411 1424
1412 qa = gfs2_qadata_get(ip); 1425 error = gfs2_rindex_update(sdp);
1413 if (!qa) 1426 if (error)
1414 return -ENOMEM; 1427 return error;
1415 1428
1416 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1429 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1417 if (error) 1430 if (error)
1418 goto out; 1431 return error;
1419 1432
1420 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1); 1433 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
1421 if (!rgd) { 1434 if (!rgd) {
@@ -1443,8 +1456,6 @@ out_rg_gunlock:
1443 gfs2_glock_dq_uninit(&gh); 1456 gfs2_glock_dq_uninit(&gh);
1444out_qs: 1457out_qs:
1445 gfs2_quota_unhold(ip); 1458 gfs2_quota_unhold(ip);
1446out:
1447 gfs2_qadata_put(ip);
1448 return error; 1459 return error;
1449} 1460}
1450 1461
@@ -1545,6 +1556,9 @@ out_truncate:
1545 1556
1546out_unlock: 1557out_unlock:
1547 /* Error path for case 1 */ 1558 /* Error path for case 1 */
1559 if (gfs2_rs_active(ip->i_res))
1560 gfs2_rs_deltree(ip->i_res);
1561
1548 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) 1562 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
1549 gfs2_glock_dq(&ip->i_iopen_gh); 1563 gfs2_glock_dq(&ip->i_iopen_gh);
1550 gfs2_holder_uninit(&ip->i_iopen_gh); 1564 gfs2_holder_uninit(&ip->i_iopen_gh);
@@ -1554,6 +1568,7 @@ out_unlock:
1554out: 1568out:
1555 /* Case 3 starts here */ 1569 /* Case 3 starts here */
1556 truncate_inode_pages(&inode->i_data, 0); 1570 truncate_inode_pages(&inode->i_data, 0);
1571 gfs2_rs_delete(ip);
1557 clear_inode(inode); 1572 clear_inode(inode);
1558 gfs2_dir_hash_inval(ip); 1573 gfs2_dir_hash_inval(ip);
1559 ip->i_gl->gl_object = NULL; 1574 ip->i_gl->gl_object = NULL;
@@ -1576,6 +1591,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
1576 ip->i_flags = 0; 1591 ip->i_flags = 0;
1577 ip->i_gl = NULL; 1592 ip->i_gl = NULL;
1578 ip->i_rgd = NULL; 1593 ip->i_rgd = NULL;
1594 ip->i_res = NULL;
1579 } 1595 }
1580 return &ip->i_inode; 1596 return &ip->i_inode;
1581} 1597}
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 9c2592b1d5ff..8056b7b7238e 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -168,7 +168,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf,
168 if (simple_strtol(buf, NULL, 0) != 1) 168 if (simple_strtol(buf, NULL, 0) != 1)
169 return -EINVAL; 169 return -EINVAL;
170 170
171 gfs2_quota_sync(sdp->sd_vfs, 0, 1); 171 gfs2_quota_sync(sdp->sd_vfs, 0);
172 return len; 172 return len;
173} 173}
174 174
@@ -276,7 +276,15 @@ static struct attribute *gfs2_attrs[] = {
276 NULL, 276 NULL,
277}; 277};
278 278
279static void gfs2_sbd_release(struct kobject *kobj)
280{
281 struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj);
282
283 kfree(sdp);
284}
285
279static struct kobj_type gfs2_ktype = { 286static struct kobj_type gfs2_ktype = {
287 .release = gfs2_sbd_release,
280 .default_attrs = gfs2_attrs, 288 .default_attrs = gfs2_attrs,
281 .sysfs_ops = &gfs2_attr_ops, 289 .sysfs_ops = &gfs2_attr_ops,
282}; 290};
@@ -583,6 +591,7 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
583 char ro[20]; 591 char ro[20];
584 char spectator[20]; 592 char spectator[20];
585 char *envp[] = { ro, spectator, NULL }; 593 char *envp[] = { ro, spectator, NULL };
594 int sysfs_frees_sdp = 0;
586 595
587 sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0); 596 sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
588 sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0); 597 sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
@@ -591,8 +600,10 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
591 error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, 600 error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
592 "%s", sdp->sd_table_name); 601 "%s", sdp->sd_table_name);
593 if (error) 602 if (error)
594 goto fail; 603 goto fail_reg;
595 604
605 sysfs_frees_sdp = 1; /* Freeing sdp is now done by sysfs calling
606 function gfs2_sbd_release. */
596 error = sysfs_create_group(&sdp->sd_kobj, &tune_group); 607 error = sysfs_create_group(&sdp->sd_kobj, &tune_group);
597 if (error) 608 if (error)
598 goto fail_reg; 609 goto fail_reg;
@@ -615,9 +626,13 @@ fail_lock_module:
615fail_tune: 626fail_tune:
616 sysfs_remove_group(&sdp->sd_kobj, &tune_group); 627 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
617fail_reg: 628fail_reg:
618 kobject_put(&sdp->sd_kobj); 629 free_percpu(sdp->sd_lkstats);
619fail:
620 fs_err(sdp, "error %d adding sysfs files", error); 630 fs_err(sdp, "error %d adding sysfs files", error);
631 if (sysfs_frees_sdp)
632 kobject_put(&sdp->sd_kobj);
633 else
634 kfree(sdp);
635 sb->s_fs_info = NULL;
621 return error; 636 return error;
622} 637}
623 638
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 1b8b81588199..a25c252fe412 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -14,6 +14,7 @@
14#include <linux/ktime.h> 14#include <linux/ktime.h>
15#include "incore.h" 15#include "incore.h"
16#include "glock.h" 16#include "glock.h"
17#include "rgrp.h"
17 18
18#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn } 19#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
19#define glock_trace_name(x) __print_symbolic(x, \ 20#define glock_trace_name(x) __print_symbolic(x, \
@@ -31,6 +32,17 @@
31 { GFS2_BLKST_DINODE, "dinode" }, \ 32 { GFS2_BLKST_DINODE, "dinode" }, \
32 { GFS2_BLKST_UNLINKED, "unlinked" }) 33 { GFS2_BLKST_UNLINKED, "unlinked" })
33 34
35#define TRACE_RS_DELETE 0
36#define TRACE_RS_TREEDEL 1
37#define TRACE_RS_INSERT 2
38#define TRACE_RS_CLAIM 3
39
40#define rs_func_name(x) __print_symbolic(x, \
41 { 0, "del " }, \
42 { 1, "tdel" }, \
43 { 2, "ins " }, \
44 { 3, "clm " })
45
34#define show_glock_flags(flags) __print_flags(flags, "", \ 46#define show_glock_flags(flags) __print_flags(flags, "", \
35 {(1UL << GLF_LOCK), "l" }, \ 47 {(1UL << GLF_LOCK), "l" }, \
36 {(1UL << GLF_DEMOTE), "D" }, \ 48 {(1UL << GLF_DEMOTE), "D" }, \
@@ -470,6 +482,7 @@ TRACE_EVENT(gfs2_block_alloc,
470 __field( u8, block_state ) 482 __field( u8, block_state )
471 __field( u64, rd_addr ) 483 __field( u64, rd_addr )
472 __field( u32, rd_free_clone ) 484 __field( u32, rd_free_clone )
485 __field( u32, rd_reserved )
473 ), 486 ),
474 487
475 TP_fast_assign( 488 TP_fast_assign(
@@ -480,16 +493,58 @@ TRACE_EVENT(gfs2_block_alloc,
480 __entry->block_state = block_state; 493 __entry->block_state = block_state;
481 __entry->rd_addr = rgd->rd_addr; 494 __entry->rd_addr = rgd->rd_addr;
482 __entry->rd_free_clone = rgd->rd_free_clone; 495 __entry->rd_free_clone = rgd->rd_free_clone;
496 __entry->rd_reserved = rgd->rd_reserved;
483 ), 497 ),
484 498
485 TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u", 499 TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
486 MAJOR(__entry->dev), MINOR(__entry->dev), 500 MAJOR(__entry->dev), MINOR(__entry->dev),
487 (unsigned long long)__entry->inum, 501 (unsigned long long)__entry->inum,
488 (unsigned long long)__entry->start, 502 (unsigned long long)__entry->start,
489 (unsigned long)__entry->len, 503 (unsigned long)__entry->len,
490 block_state_name(__entry->block_state), 504 block_state_name(__entry->block_state),
491 (unsigned long long)__entry->rd_addr, 505 (unsigned long long)__entry->rd_addr,
492 __entry->rd_free_clone) 506 __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
507);
508
509/* Keep track of multi-block reservations as they are allocated/freed */
510TRACE_EVENT(gfs2_rs,
511
512 TP_PROTO(const struct gfs2_inode *ip, const struct gfs2_blkreserv *rs,
513 u8 func),
514
515 TP_ARGS(ip, rs, func),
516
517 TP_STRUCT__entry(
518 __field( dev_t, dev )
519 __field( u64, rd_addr )
520 __field( u32, rd_free_clone )
521 __field( u32, rd_reserved )
522 __field( u64, inum )
523 __field( u64, start )
524 __field( u32, free )
525 __field( u8, func )
526 ),
527
528 TP_fast_assign(
529 __entry->dev = rs->rs_rgd ? rs->rs_rgd->rd_sbd->sd_vfs->s_dev : 0;
530 __entry->rd_addr = rs->rs_rgd ? rs->rs_rgd->rd_addr : 0;
531 __entry->rd_free_clone = rs->rs_rgd ? rs->rs_rgd->rd_free_clone : 0;
532 __entry->rd_reserved = rs->rs_rgd ? rs->rs_rgd->rd_reserved : 0;
533 __entry->inum = ip ? ip->i_no_addr : 0;
534 __entry->start = gfs2_rs_startblk(rs);
535 __entry->free = rs->rs_free;
536 __entry->func = func;
537 ),
538
539 TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s "
540 "f:%lu",
541 MAJOR(__entry->dev), MINOR(__entry->dev),
542 (unsigned long long)__entry->inum,
543 (unsigned long long)__entry->start,
544 (unsigned long long)__entry->rd_addr,
545 (unsigned long)__entry->rd_free_clone,
546 (unsigned long)__entry->rd_reserved,
547 rs_func_name(__entry->func), (unsigned long)__entry->free)
493); 548);
494 549
495#endif /* _TRACE_GFS2_H */ 550#endif /* _TRACE_GFS2_H */
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index 125d4572e1c0..41f42cdccbb8 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -31,7 +31,7 @@ struct gfs2_glock;
31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) 31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
32{ 32{
33 const struct gfs2_blkreserv *rs = ip->i_res; 33 const struct gfs2_blkreserv *rs = ip->i_res;
34 if (rs->rs_requested < ip->i_rgd->rd_length) 34 if (rs && rs->rs_requested < ip->i_rgd->rd_length)
35 return rs->rs_requested + 1; 35 return rs->rs_requested + 1;
36 return ip->i_rgd->rd_length; 36 return ip->i_rgd->rd_length;
37} 37}
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 3586b0dd6aa7..80535739ac7b 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -79,23 +79,19 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
79 const char *type, const char *function, 79 const char *type, const char *function,
80 char *file, unsigned int line); 80 char *file, unsigned int line);
81 81
82static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, 82static inline int gfs2_meta_check(struct gfs2_sbd *sdp,
83 struct buffer_head *bh, 83 struct buffer_head *bh)
84 const char *function,
85 char *file, unsigned int line)
86{ 84{
87 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; 85 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data;
88 u32 magic = be32_to_cpu(mh->mh_magic); 86 u32 magic = be32_to_cpu(mh->mh_magic);
89 if (unlikely(magic != GFS2_MAGIC)) 87 if (unlikely(magic != GFS2_MAGIC)) {
90 return gfs2_meta_check_ii(sdp, bh, "magic number", function, 88 printk(KERN_ERR "GFS2: Magic number missing at %llu\n",
91 file, line); 89 (unsigned long long)bh->b_blocknr);
90 return -EIO;
91 }
92 return 0; 92 return 0;
93} 93}
94 94
95#define gfs2_meta_check(sdp, bh) \
96gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
97
98
99int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, 95int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
100 u16 type, u16 t, 96 u16 type, u16 t,
101 const char *function, 97 const char *function,
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 927f4df874ae..27a0b4a901f5 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -325,12 +325,11 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
325 struct gfs2_ea_header *ea, 325 struct gfs2_ea_header *ea,
326 struct gfs2_ea_header *prev, int leave) 326 struct gfs2_ea_header *prev, int leave)
327{ 327{
328 struct gfs2_qadata *qa;
329 int error; 328 int error;
330 329
331 qa = gfs2_qadata_get(ip); 330 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
332 if (!qa) 331 if (error)
333 return -ENOMEM; 332 return error;
334 333
335 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 334 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
336 if (error) 335 if (error)
@@ -340,7 +339,6 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
340 339
341 gfs2_quota_unhold(ip); 340 gfs2_quota_unhold(ip);
342out_alloc: 341out_alloc:
343 gfs2_qadata_put(ip);
344 return error; 342 return error;
345} 343}
346 344
@@ -713,17 +711,16 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
713 unsigned int blks, 711 unsigned int blks,
714 ea_skeleton_call_t skeleton_call, void *private) 712 ea_skeleton_call_t skeleton_call, void *private)
715{ 713{
716 struct gfs2_qadata *qa;
717 struct buffer_head *dibh; 714 struct buffer_head *dibh;
718 int error; 715 int error;
719 716
720 qa = gfs2_qadata_get(ip); 717 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
721 if (!qa) 718 if (error)
722 return -ENOMEM; 719 return error;
723 720
724 error = gfs2_quota_lock_check(ip); 721 error = gfs2_quota_lock_check(ip);
725 if (error) 722 if (error)
726 goto out; 723 return error;
727 724
728 error = gfs2_inplace_reserve(ip, blks); 725 error = gfs2_inplace_reserve(ip, blks);
729 if (error) 726 if (error)
@@ -753,8 +750,6 @@ out_ipres:
753 gfs2_inplace_release(ip); 750 gfs2_inplace_release(ip);
754out_gunlock_q: 751out_gunlock_q:
755 gfs2_quota_unlock(ip); 752 gfs2_quota_unlock(ip);
756out:
757 gfs2_qadata_put(ip);
758 return error; 753 return error;
759} 754}
760 755
@@ -1494,16 +1489,15 @@ out_gunlock:
1494 1489
1495int gfs2_ea_dealloc(struct gfs2_inode *ip) 1490int gfs2_ea_dealloc(struct gfs2_inode *ip)
1496{ 1491{
1497 struct gfs2_qadata *qa;
1498 int error; 1492 int error;
1499 1493
1500 qa = gfs2_qadata_get(ip); 1494 error = gfs2_rindex_update(GFS2_SB(&ip->i_inode));
1501 if (!qa) 1495 if (error)
1502 return -ENOMEM; 1496 return error;
1503 1497
1504 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1498 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1505 if (error) 1499 if (error)
1506 goto out_alloc; 1500 return error;
1507 1501
1508 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL); 1502 error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
1509 if (error) 1503 if (error)
@@ -1519,8 +1513,6 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
1519 1513
1520out_quota: 1514out_quota:
1521 gfs2_quota_unhold(ip); 1515 gfs2_quota_unhold(ip);
1522out_alloc:
1523 gfs2_qadata_put(ip);
1524 return error; 1516 return error;
1525} 1517}
1526 1518
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 62fc14ea4b73..422dde2ec0a1 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -18,7 +18,7 @@
18 * hfs_lookup() 18 * hfs_lookup()
19 */ 19 */
20static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, 20static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry,
21 struct nameidata *nd) 21 unsigned int flags)
22{ 22{
23 hfs_cat_rec rec; 23 hfs_cat_rec rec;
24 struct hfs_find_data fd; 24 struct hfs_find_data fd;
@@ -187,7 +187,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
187 * the directory and the name (and its length) of the new file. 187 * the directory and the name (and its length) of the new file.
188 */ 188 */
189static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 189static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
190 struct nameidata *nd) 190 bool excl)
191{ 191{
192 struct inode *inode; 192 struct inode *inode;
193 int res; 193 int res;
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index 2c16316d2917..a67955a0c36f 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -432,7 +432,7 @@ out:
432 if (inode->i_ino < HFS_FIRSTUSER_CNID) 432 if (inode->i_ino < HFS_FIRSTUSER_CNID)
433 set_bit(HFS_FLG_ALT_MDB_DIRTY, &HFS_SB(sb)->flags); 433 set_bit(HFS_FLG_ALT_MDB_DIRTY, &HFS_SB(sb)->flags);
434 set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); 434 set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
435 sb->s_dirt = 1; 435 hfs_mark_mdb_dirty(sb);
436 } 436 }
437 return res; 437 return res;
438 438
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 1bf967c6bfdc..8275175acf6e 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -14,6 +14,7 @@
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/workqueue.h>
17 18
18#include <asm/byteorder.h> 19#include <asm/byteorder.h>
19#include <asm/uaccess.h> 20#include <asm/uaccess.h>
@@ -137,16 +138,15 @@ struct hfs_sb_info {
137 gid_t s_gid; /* The gid of all files */ 138 gid_t s_gid; /* The gid of all files */
138 139
139 int session, part; 140 int session, part;
140
141 struct nls_table *nls_io, *nls_disk; 141 struct nls_table *nls_io, *nls_disk;
142
143 struct mutex bitmap_lock; 142 struct mutex bitmap_lock;
144
145 unsigned long flags; 143 unsigned long flags;
146
147 u16 blockoffset; 144 u16 blockoffset;
148
149 int fs_div; 145 int fs_div;
146 struct super_block *sb;
147 int work_queued; /* non-zero delayed work is queued */
148 struct delayed_work mdb_work; /* MDB flush delayed work */
149 spinlock_t work_lock; /* protects mdb_work and work_queued */
150}; 150};
151 151
152#define HFS_FLG_BITMAP_DIRTY 0 152#define HFS_FLG_BITMAP_DIRTY 0
@@ -226,6 +226,9 @@ extern int hfs_compare_dentry(const struct dentry *parent,
226extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); 226extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *);
227extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *); 227extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *);
228 228
229/* super.c */
230extern void hfs_mark_mdb_dirty(struct super_block *sb);
231
229extern struct timezone sys_tz; 232extern struct timezone sys_tz;
230 233
231/* 234/*
@@ -253,7 +256,7 @@ static inline const char *hfs_mdb_name(struct super_block *sb)
253static inline void hfs_bitmap_dirty(struct super_block *sb) 256static inline void hfs_bitmap_dirty(struct super_block *sb)
254{ 257{
255 set_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags); 258 set_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags);
256 sb->s_dirt = 1; 259 hfs_mark_mdb_dirty(sb);
257} 260}
258 261
259#define sb_bread512(sb, sec, data) ({ \ 262#define sb_bread512(sb, sec, data) ({ \
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 761ec06354b4..ee1bc55677f1 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -220,7 +220,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode)
220 insert_inode_hash(inode); 220 insert_inode_hash(inode);
221 mark_inode_dirty(inode); 221 mark_inode_dirty(inode);
222 set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); 222 set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
223 sb->s_dirt = 1; 223 hfs_mark_mdb_dirty(sb);
224 224
225 return inode; 225 return inode;
226} 226}
@@ -235,7 +235,7 @@ void hfs_delete_inode(struct inode *inode)
235 if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) 235 if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID))
236 HFS_SB(sb)->root_dirs--; 236 HFS_SB(sb)->root_dirs--;
237 set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); 237 set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
238 sb->s_dirt = 1; 238 hfs_mark_mdb_dirty(sb);
239 return; 239 return;
240 } 240 }
241 HFS_SB(sb)->file_count--; 241 HFS_SB(sb)->file_count--;
@@ -248,7 +248,7 @@ void hfs_delete_inode(struct inode *inode)
248 } 248 }
249 } 249 }
250 set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); 250 set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags);
251 sb->s_dirt = 1; 251 hfs_mark_mdb_dirty(sb);
252} 252}
253 253
254void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, 254void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
@@ -489,7 +489,7 @@ out:
489} 489}
490 490
491static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, 491static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
492 struct nameidata *nd) 492 unsigned int flags)
493{ 493{
494 struct inode *inode = NULL; 494 struct inode *inode = NULL;
495 hfs_cat_rec rec; 495 hfs_cat_rec rec;
@@ -644,13 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
644 644
645 /* sync the superblock to buffers */ 645 /* sync the superblock to buffers */
646 sb = inode->i_sb; 646 sb = inode->i_sb;
647 if (sb->s_dirt) { 647 flush_delayed_work_sync(&HFS_SB(sb)->mdb_work);
648 lock_super(sb);
649 sb->s_dirt = 0;
650 if (!(sb->s_flags & MS_RDONLY))
651 hfs_mdb_commit(sb);
652 unlock_super(sb);
653 }
654 /* .. finally sync the buffers to disk */ 648 /* .. finally sync the buffers to disk */
655 err = sync_blockdev(sb->s_bdev); 649 err = sync_blockdev(sb->s_bdev);
656 if (!ret) 650 if (!ret)
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 1563d5ce5764..5fd51a5833ff 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -260,6 +260,10 @@ void hfs_mdb_commit(struct super_block *sb)
260{ 260{
261 struct hfs_mdb *mdb = HFS_SB(sb)->mdb; 261 struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
262 262
263 if (sb->s_flags & MS_RDONLY)
264 return;
265
266 lock_buffer(HFS_SB(sb)->mdb_bh);
263 if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) { 267 if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) {
264 /* These parameters may have been modified, so write them back */ 268 /* These parameters may have been modified, so write them back */
265 mdb->drLsMod = hfs_mtime(); 269 mdb->drLsMod = hfs_mtime();
@@ -283,9 +287,13 @@ void hfs_mdb_commit(struct super_block *sb)
283 &mdb->drXTFlSize, NULL); 287 &mdb->drXTFlSize, NULL);
284 hfs_inode_write_fork(HFS_SB(sb)->cat_tree->inode, mdb->drCTExtRec, 288 hfs_inode_write_fork(HFS_SB(sb)->cat_tree->inode, mdb->drCTExtRec,
285 &mdb->drCTFlSize, NULL); 289 &mdb->drCTFlSize, NULL);
290
291 lock_buffer(HFS_SB(sb)->alt_mdb_bh);
286 memcpy(HFS_SB(sb)->alt_mdb, HFS_SB(sb)->mdb, HFS_SECTOR_SIZE); 292 memcpy(HFS_SB(sb)->alt_mdb, HFS_SB(sb)->mdb, HFS_SECTOR_SIZE);
287 HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); 293 HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT);
288 HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); 294 HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT);
295 unlock_buffer(HFS_SB(sb)->alt_mdb_bh);
296
289 mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh); 297 mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh);
290 sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh); 298 sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh);
291 } 299 }
@@ -308,7 +316,11 @@ void hfs_mdb_commit(struct super_block *sb)
308 break; 316 break;
309 } 317 }
310 len = min((int)sb->s_blocksize - off, size); 318 len = min((int)sb->s_blocksize - off, size);
319
320 lock_buffer(bh);
311 memcpy(bh->b_data + off, ptr, len); 321 memcpy(bh->b_data + off, ptr, len);
322 unlock_buffer(bh);
323
312 mark_buffer_dirty(bh); 324 mark_buffer_dirty(bh);
313 brelse(bh); 325 brelse(bh);
314 block++; 326 block++;
@@ -317,6 +329,7 @@ void hfs_mdb_commit(struct super_block *sb)
317 size -= len; 329 size -= len;
318 } 330 }
319 } 331 }
332 unlock_buffer(HFS_SB(sb)->mdb_bh);
320} 333}
321 334
322void hfs_mdb_close(struct super_block *sb) 335void hfs_mdb_close(struct super_block *sb)
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 7b4c537d6e13..4eb873e0c07b 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -29,43 +29,9 @@ static struct kmem_cache *hfs_inode_cachep;
29 29
30MODULE_LICENSE("GPL"); 30MODULE_LICENSE("GPL");
31 31
32/*
33 * hfs_write_super()
34 *
35 * Description:
36 * This function is called by the VFS only. When the filesystem
37 * is mounted r/w it updates the MDB on disk.
38 * Input Variable(s):
39 * struct super_block *sb: Pointer to the hfs superblock
40 * Output Variable(s):
41 * NONE
42 * Returns:
43 * void
44 * Preconditions:
45 * 'sb' points to a "valid" (struct super_block).
46 * Postconditions:
47 * The MDB is marked 'unsuccessfully unmounted' by clearing bit 8 of drAtrb
48 * (hfs_put_super() must set this flag!). Some MDB fields are updated
49 * and the MDB buffer is written to disk by calling hfs_mdb_commit().
50 */
51static void hfs_write_super(struct super_block *sb)
52{
53 lock_super(sb);
54 sb->s_dirt = 0;
55
56 /* sync everything to the buffers */
57 if (!(sb->s_flags & MS_RDONLY))
58 hfs_mdb_commit(sb);
59 unlock_super(sb);
60}
61
62static int hfs_sync_fs(struct super_block *sb, int wait) 32static int hfs_sync_fs(struct super_block *sb, int wait)
63{ 33{
64 lock_super(sb);
65 hfs_mdb_commit(sb); 34 hfs_mdb_commit(sb);
66 sb->s_dirt = 0;
67 unlock_super(sb);
68
69 return 0; 35 return 0;
70} 36}
71 37
@@ -78,13 +44,44 @@ static int hfs_sync_fs(struct super_block *sb, int wait)
78 */ 44 */
79static void hfs_put_super(struct super_block *sb) 45static void hfs_put_super(struct super_block *sb)
80{ 46{
81 if (sb->s_dirt) 47 cancel_delayed_work_sync(&HFS_SB(sb)->mdb_work);
82 hfs_write_super(sb);
83 hfs_mdb_close(sb); 48 hfs_mdb_close(sb);
84 /* release the MDB's resources */ 49 /* release the MDB's resources */
85 hfs_mdb_put(sb); 50 hfs_mdb_put(sb);
86} 51}
87 52
53static void flush_mdb(struct work_struct *work)
54{
55 struct hfs_sb_info *sbi;
56 struct super_block *sb;
57
58 sbi = container_of(work, struct hfs_sb_info, mdb_work.work);
59 sb = sbi->sb;
60
61 spin_lock(&sbi->work_lock);
62 sbi->work_queued = 0;
63 spin_unlock(&sbi->work_lock);
64
65 hfs_mdb_commit(sb);
66}
67
68void hfs_mark_mdb_dirty(struct super_block *sb)
69{
70 struct hfs_sb_info *sbi = HFS_SB(sb);
71 unsigned long delay;
72
73 if (sb->s_flags & MS_RDONLY)
74 return;
75
76 spin_lock(&sbi->work_lock);
77 if (!sbi->work_queued) {
78 delay = msecs_to_jiffies(dirty_writeback_interval * 10);
79 queue_delayed_work(system_long_wq, &sbi->mdb_work, delay);
80 sbi->work_queued = 1;
81 }
82 spin_unlock(&sbi->work_lock);
83}
84
88/* 85/*
89 * hfs_statfs() 86 * hfs_statfs()
90 * 87 *
@@ -184,7 +181,6 @@ static const struct super_operations hfs_super_operations = {
184 .write_inode = hfs_write_inode, 181 .write_inode = hfs_write_inode,
185 .evict_inode = hfs_evict_inode, 182 .evict_inode = hfs_evict_inode,
186 .put_super = hfs_put_super, 183 .put_super = hfs_put_super,
187 .write_super = hfs_write_super,
188 .sync_fs = hfs_sync_fs, 184 .sync_fs = hfs_sync_fs,
189 .statfs = hfs_statfs, 185 .statfs = hfs_statfs,
190 .remount_fs = hfs_remount, 186 .remount_fs = hfs_remount,
@@ -387,7 +383,10 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
387 if (!sbi) 383 if (!sbi)
388 return -ENOMEM; 384 return -ENOMEM;
389 385
386 sbi->sb = sb;
390 sb->s_fs_info = sbi; 387 sb->s_fs_info = sbi;
388 spin_lock_init(&sbi->work_lock);
389 INIT_DELAYED_WORK(&sbi->mdb_work, flush_mdb);
391 390
392 res = -EINVAL; 391 res = -EINVAL;
393 if (!parse_options((char *)data, sbi)) { 392 if (!parse_options((char *)data, sbi)) {
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 19cf291eb91f..91b91fd3a901 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -13,12 +13,12 @@
13 13
14/* dentry case-handling: just lowercase everything */ 14/* dentry case-handling: just lowercase everything */
15 15
16static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) 16static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
17{ 17{
18 struct inode *inode; 18 struct inode *inode;
19 int diff; 19 int diff;
20 20
21 if (nd->flags & LOOKUP_RCU) 21 if (flags & LOOKUP_RCU)
22 return -ECHILD; 22 return -ECHILD;
23 23
24 inode = dentry->d_inode; 24 inode = dentry->d_inode;
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index 1cad80c789cb..4cfbe2edd296 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -153,7 +153,7 @@ done:
153 kunmap(page); 153 kunmap(page);
154 *max = offset + (curr - pptr) * 32 + i - start; 154 *max = offset + (curr - pptr) * 32 + i - start;
155 sbi->free_blocks -= *max; 155 sbi->free_blocks -= *max;
156 sb->s_dirt = 1; 156 hfsplus_mark_mdb_dirty(sb);
157 dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); 157 dprint(DBG_BITMAP, "-> %u,%u\n", start, *max);
158out: 158out:
159 mutex_unlock(&sbi->alloc_mutex); 159 mutex_unlock(&sbi->alloc_mutex);
@@ -228,7 +228,7 @@ out:
228 set_page_dirty(page); 228 set_page_dirty(page);
229 kunmap(page); 229 kunmap(page);
230 sbi->free_blocks += len; 230 sbi->free_blocks += len;
231 sb->s_dirt = 1; 231 hfsplus_mark_mdb_dirty(sb);
232 mutex_unlock(&sbi->alloc_mutex); 232 mutex_unlock(&sbi->alloc_mutex);
233 233
234 return 0; 234 return 0;
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 26b53fb09f68..6b9f921ef2fa 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -25,7 +25,7 @@ static inline void hfsplus_instantiate(struct dentry *dentry,
25 25
26/* Find the entry inside dir named dentry->d_name */ 26/* Find the entry inside dir named dentry->d_name */
27static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, 27static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
28 struct nameidata *nd) 28 unsigned int flags)
29{ 29{
30 struct inode *inode = NULL; 30 struct inode *inode = NULL;
31 struct hfs_find_data fd; 31 struct hfs_find_data fd;
@@ -316,7 +316,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
316 inode->i_ctime = CURRENT_TIME_SEC; 316 inode->i_ctime = CURRENT_TIME_SEC;
317 mark_inode_dirty(inode); 317 mark_inode_dirty(inode);
318 sbi->file_count++; 318 sbi->file_count++;
319 dst_dir->i_sb->s_dirt = 1; 319 hfsplus_mark_mdb_dirty(dst_dir->i_sb);
320out: 320out:
321 mutex_unlock(&sbi->vh_mutex); 321 mutex_unlock(&sbi->vh_mutex);
322 return res; 322 return res;
@@ -465,7 +465,7 @@ out:
465} 465}
466 466
467static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode, 467static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
468 struct nameidata *nd) 468 bool excl)
469{ 469{
470 return hfsplus_mknod(dir, dentry, mode, 0); 470 return hfsplus_mknod(dir, dentry, mode, 0);
471} 471}
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 4e75ac646fea..558dbb463a4e 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -153,8 +153,11 @@ struct hfsplus_sb_info {
153 gid_t gid; 153 gid_t gid;
154 154
155 int part, session; 155 int part, session;
156
157 unsigned long flags; 156 unsigned long flags;
157
158 int work_queued; /* non-zero delayed work is queued */
159 struct delayed_work sync_work; /* FS sync delayed work */
160 spinlock_t work_lock; /* protects sync_work and work_queued */
158}; 161};
159 162
160#define HFSPLUS_SB_WRITEBACKUP 0 163#define HFSPLUS_SB_WRITEBACKUP 0
@@ -428,7 +431,7 @@ int hfsplus_show_options(struct seq_file *, struct dentry *);
428 431
429/* super.c */ 432/* super.c */
430struct inode *hfsplus_iget(struct super_block *, unsigned long); 433struct inode *hfsplus_iget(struct super_block *, unsigned long);
431int hfsplus_sync_fs(struct super_block *sb, int wait); 434void hfsplus_mark_mdb_dirty(struct super_block *sb);
432 435
433/* tables.c */ 436/* tables.c */
434extern u16 hfsplus_case_fold_table[]; 437extern u16 hfsplus_case_fold_table[];
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 82b69ee4dacc..3d8b4a675ba0 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -168,7 +168,7 @@ const struct dentry_operations hfsplus_dentry_operations = {
168}; 168};
169 169
170static struct dentry *hfsplus_file_lookup(struct inode *dir, 170static struct dentry *hfsplus_file_lookup(struct inode *dir,
171 struct dentry *dentry, struct nameidata *nd) 171 struct dentry *dentry, unsigned int flags)
172{ 172{
173 struct hfs_find_data fd; 173 struct hfs_find_data fd;
174 struct super_block *sb = dir->i_sb; 174 struct super_block *sb = dir->i_sb;
@@ -431,7 +431,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
431 sbi->file_count++; 431 sbi->file_count++;
432 insert_inode_hash(inode); 432 insert_inode_hash(inode);
433 mark_inode_dirty(inode); 433 mark_inode_dirty(inode);
434 sb->s_dirt = 1; 434 hfsplus_mark_mdb_dirty(sb);
435 435
436 return inode; 436 return inode;
437} 437}
@@ -442,7 +442,7 @@ void hfsplus_delete_inode(struct inode *inode)
442 442
443 if (S_ISDIR(inode->i_mode)) { 443 if (S_ISDIR(inode->i_mode)) {
444 HFSPLUS_SB(sb)->folder_count--; 444 HFSPLUS_SB(sb)->folder_count--;
445 sb->s_dirt = 1; 445 hfsplus_mark_mdb_dirty(sb);
446 return; 446 return;
447 } 447 }
448 HFSPLUS_SB(sb)->file_count--; 448 HFSPLUS_SB(sb)->file_count--;
@@ -455,7 +455,7 @@ void hfsplus_delete_inode(struct inode *inode)
455 inode->i_size = 0; 455 inode->i_size = 0;
456 hfsplus_file_truncate(inode); 456 hfsplus_file_truncate(inode);
457 } 457 }
458 sb->s_dirt = 1; 458 hfsplus_mark_mdb_dirty(sb);
459} 459}
460 460
461void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) 461void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork)
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index a9bca4b8768b..473332098013 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -124,7 +124,7 @@ static int hfsplus_system_write_inode(struct inode *inode)
124 124
125 if (fork->total_size != cpu_to_be64(inode->i_size)) { 125 if (fork->total_size != cpu_to_be64(inode->i_size)) {
126 set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); 126 set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags);
127 inode->i_sb->s_dirt = 1; 127 hfsplus_mark_mdb_dirty(inode->i_sb);
128 } 128 }
129 hfsplus_inode_write_fork(inode, fork); 129 hfsplus_inode_write_fork(inode, fork);
130 if (tree) 130 if (tree)
@@ -161,7 +161,7 @@ static void hfsplus_evict_inode(struct inode *inode)
161 } 161 }
162} 162}
163 163
164int hfsplus_sync_fs(struct super_block *sb, int wait) 164static int hfsplus_sync_fs(struct super_block *sb, int wait)
165{ 165{
166 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 166 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
167 struct hfsplus_vh *vhdr = sbi->s_vhdr; 167 struct hfsplus_vh *vhdr = sbi->s_vhdr;
@@ -171,9 +171,7 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
171 if (!wait) 171 if (!wait)
172 return 0; 172 return 0;
173 173
174 dprint(DBG_SUPER, "hfsplus_write_super\n"); 174 dprint(DBG_SUPER, "hfsplus_sync_fs\n");
175
176 sb->s_dirt = 0;
177 175
178 /* 176 /*
179 * Explicitly write out the special metadata inodes. 177 * Explicitly write out the special metadata inodes.
@@ -226,12 +224,34 @@ out:
226 return error; 224 return error;
227} 225}
228 226
229static void hfsplus_write_super(struct super_block *sb) 227static void delayed_sync_fs(struct work_struct *work)
230{ 228{
231 if (!(sb->s_flags & MS_RDONLY)) 229 struct hfsplus_sb_info *sbi;
232 hfsplus_sync_fs(sb, 1); 230
233 else 231 sbi = container_of(work, struct hfsplus_sb_info, sync_work.work);
234 sb->s_dirt = 0; 232
233 spin_lock(&sbi->work_lock);
234 sbi->work_queued = 0;
235 spin_unlock(&sbi->work_lock);
236
237 hfsplus_sync_fs(sbi->alloc_file->i_sb, 1);
238}
239
240void hfsplus_mark_mdb_dirty(struct super_block *sb)
241{
242 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
243 unsigned long delay;
244
245 if (sb->s_flags & MS_RDONLY)
246 return;
247
248 spin_lock(&sbi->work_lock);
249 if (!sbi->work_queued) {
250 delay = msecs_to_jiffies(dirty_writeback_interval * 10);
251 queue_delayed_work(system_long_wq, &sbi->sync_work, delay);
252 sbi->work_queued = 1;
253 }
254 spin_unlock(&sbi->work_lock);
235} 255}
236 256
237static void hfsplus_put_super(struct super_block *sb) 257static void hfsplus_put_super(struct super_block *sb)
@@ -240,8 +260,7 @@ static void hfsplus_put_super(struct super_block *sb)
240 260
241 dprint(DBG_SUPER, "hfsplus_put_super\n"); 261 dprint(DBG_SUPER, "hfsplus_put_super\n");
242 262
243 if (!sb->s_fs_info) 263 cancel_delayed_work_sync(&sbi->sync_work);
244 return;
245 264
246 if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { 265 if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) {
247 struct hfsplus_vh *vhdr = sbi->s_vhdr; 266 struct hfsplus_vh *vhdr = sbi->s_vhdr;
@@ -328,7 +347,6 @@ static const struct super_operations hfsplus_sops = {
328 .write_inode = hfsplus_write_inode, 347 .write_inode = hfsplus_write_inode,
329 .evict_inode = hfsplus_evict_inode, 348 .evict_inode = hfsplus_evict_inode,
330 .put_super = hfsplus_put_super, 349 .put_super = hfsplus_put_super,
331 .write_super = hfsplus_write_super,
332 .sync_fs = hfsplus_sync_fs, 350 .sync_fs = hfsplus_sync_fs,
333 .statfs = hfsplus_statfs, 351 .statfs = hfsplus_statfs,
334 .remount_fs = hfsplus_remount, 352 .remount_fs = hfsplus_remount,
@@ -355,6 +373,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
355 sb->s_fs_info = sbi; 373 sb->s_fs_info = sbi;
356 mutex_init(&sbi->alloc_mutex); 374 mutex_init(&sbi->alloc_mutex);
357 mutex_init(&sbi->vh_mutex); 375 mutex_init(&sbi->vh_mutex);
376 spin_lock_init(&sbi->work_lock);
377 INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
358 hfsplus_fill_defaults(sbi); 378 hfsplus_fill_defaults(sbi);
359 379
360 err = -EINVAL; 380 err = -EINVAL;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2afa5bbccf9b..124146543aa7 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -553,7 +553,7 @@ static int read_name(struct inode *ino, char *name)
553} 553}
554 554
555int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 555int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
556 struct nameidata *nd) 556 bool excl)
557{ 557{
558 struct inode *inode; 558 struct inode *inode;
559 char *name; 559 char *name;
@@ -595,7 +595,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
595} 595}
596 596
597struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, 597struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
598 struct nameidata *nd) 598 unsigned int flags)
599{ 599{
600 struct inode *inode; 600 struct inode *inode;
601 char *name; 601 char *name;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index b8472f803f4e..78e12b2e0ea2 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -189,7 +189,7 @@ out:
189 * to tell read_inode to read fnode or not. 189 * to tell read_inode to read fnode or not.
190 */ 190 */
191 191
192struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 192struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
193{ 193{
194 const unsigned char *name = dentry->d_name.name; 194 const unsigned char *name = dentry->d_name.name;
195 unsigned len = dentry->d_name.len; 195 unsigned len = dentry->d_name.len;
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c07ef1f1ced6..ac1ead194db5 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -220,7 +220,7 @@ extern const struct dentry_operations hpfs_dentry_operations;
220 220
221/* dir.c */ 221/* dir.c */
222 222
223struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *); 223struct dentry *hpfs_lookup(struct inode *, struct dentry *, unsigned int);
224extern const struct file_operations hpfs_dir_ops; 224extern const struct file_operations hpfs_dir_ops;
225 225
226/* dnode.c */ 226/* dnode.c */
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 9083ef8af58c..bc9082482f68 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -115,7 +115,7 @@ bail:
115 return err; 115 return err;
116} 116}
117 117
118static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) 118static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
119{ 119{
120 const unsigned char *name = dentry->d_name.name; 120 const unsigned char *name = dentry->d_name.name;
121 unsigned len = dentry->d_name.len; 121 unsigned len = dentry->d_name.len;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index d4f93b52cec5..c1dffe47fde2 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -138,7 +138,7 @@ static int file_removed(struct dentry *dentry, const char *file)
138} 138}
139 139
140static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, 140static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
141 struct nameidata *nd) 141 unsigned int flags)
142{ 142{
143 struct dentry *proc_dentry, *parent; 143 struct dentry *proc_dentry, *parent;
144 struct qstr *name = &dentry->d_name; 144 struct qstr *name = &dentry->d_name;
@@ -420,8 +420,7 @@ static int hppfs_open(struct inode *inode, struct file *file)
420{ 420{
421 const struct cred *cred = file->f_cred; 421 const struct cred *cred = file->f_cred;
422 struct hppfs_private *data; 422 struct hppfs_private *data;
423 struct vfsmount *proc_mnt; 423 struct path path;
424 struct dentry *proc_dentry;
425 char *host_file; 424 char *host_file;
426 int err, fd, type, filter; 425 int err, fd, type, filter;
427 426
@@ -434,12 +433,11 @@ static int hppfs_open(struct inode *inode, struct file *file)
434 if (host_file == NULL) 433 if (host_file == NULL)
435 goto out_free2; 434 goto out_free2;
436 435
437 proc_dentry = HPPFS_I(inode)->proc_dentry; 436 path.mnt = inode->i_sb->s_fs_info;
438 proc_mnt = inode->i_sb->s_fs_info; 437 path.dentry = HPPFS_I(inode)->proc_dentry;
439 438
440 /* XXX This isn't closed anywhere */ 439 /* XXX This isn't closed anywhere */
441 data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), 440 data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
442 file_mode(file->f_mode), cred);
443 err = PTR_ERR(data->proc_file); 441 err = PTR_ERR(data->proc_file);
444 if (IS_ERR(data->proc_file)) 442 if (IS_ERR(data->proc_file))
445 goto out_free1; 443 goto out_free1;
@@ -484,8 +482,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
484{ 482{
485 const struct cred *cred = file->f_cred; 483 const struct cred *cred = file->f_cred;
486 struct hppfs_private *data; 484 struct hppfs_private *data;
487 struct vfsmount *proc_mnt; 485 struct path path;
488 struct dentry *proc_dentry;
489 int err; 486 int err;
490 487
491 err = -ENOMEM; 488 err = -ENOMEM;
@@ -493,10 +490,9 @@ static int hppfs_dir_open(struct inode *inode, struct file *file)
493 if (data == NULL) 490 if (data == NULL)
494 goto out; 491 goto out;
495 492
496 proc_dentry = HPPFS_I(inode)->proc_dentry; 493 path.mnt = inode->i_sb->s_fs_info;
497 proc_mnt = inode->i_sb->s_fs_info; 494 path.dentry = HPPFS_I(inode)->proc_dentry;
498 data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), 495 data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
499 file_mode(file->f_mode), cred);
500 err = PTR_ERR(data->proc_file); 496 err = PTR_ERR(data->proc_file);
501 if (IS_ERR(data->proc_file)) 497 if (IS_ERR(data->proc_file))
502 goto out_free; 498 goto out_free;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cc9281b6c628..e13e9bdb0bf5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -565,7 +565,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mod
565 return retval; 565 return retval;
566} 566}
567 567
568static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) 568static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
569{ 569{
570 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 570 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
571} 571}
diff --git a/fs/inode.c b/fs/inode.c
index c99163b1b310..775cbabd4fa5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -182,7 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
182 } 182 }
183 inode->i_private = NULL; 183 inode->i_private = NULL;
184 inode->i_mapping = mapping; 184 inode->i_mapping = mapping;
185 INIT_LIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ 185 INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
186#ifdef CONFIG_FS_POSIX_ACL 186#ifdef CONFIG_FS_POSIX_ACL
187 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; 187 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
188#endif 188#endif
diff --git a/fs/internal.h b/fs/internal.h
index 18bc216ea09d..a6fd56c68b11 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -42,6 +42,11 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
42extern void __init chrdev_init(void); 42extern void __init chrdev_init(void);
43 43
44/* 44/*
45 * namei.c
46 */
47extern int __inode_permission(struct inode *, int);
48
49/*
45 * namespace.c 50 * namespace.c
46 */ 51 */
47extern int copy_mount_options(const void __user *, unsigned long *); 52extern int copy_mount_options(const void __user *, unsigned long *);
@@ -50,8 +55,6 @@ extern int copy_mount_string(const void __user *, char **);
50extern struct vfsmount *lookup_mnt(struct path *); 55extern struct vfsmount *lookup_mnt(struct path *);
51extern int finish_automount(struct vfsmount *, struct path *); 56extern int finish_automount(struct vfsmount *, struct path *);
52 57
53extern void mnt_make_longterm(struct vfsmount *);
54extern void mnt_make_shortterm(struct vfsmount *);
55extern int sb_prepare_remount_readonly(struct super_block *); 58extern int sb_prepare_remount_readonly(struct super_block *);
56 59
57extern void __init mnt_init(void); 60extern void __init mnt_init(void);
@@ -84,9 +87,6 @@ extern struct super_block *user_get_super(dev_t);
84/* 87/*
85 * open.c 88 * open.c
86 */ 89 */
87struct nameidata;
88extern struct file *nameidata_to_filp(struct nameidata *);
89extern void release_open_intent(struct nameidata *);
90struct open_flags { 90struct open_flags {
91 int open_flag; 91 int open_flag;
92 umode_t mode; 92 umode_t mode;
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index aa4356d09eee..1d3804492aa7 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -134,6 +134,7 @@ isofs_export_encode_fh(struct inode *inode,
134 len = 3; 134 len = 3;
135 fh32[0] = ei->i_iget5_block; 135 fh32[0] = ei->i_iget5_block;
136 fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ 136 fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */
137 fh16[3] = 0; /* avoid leaking uninitialized data */
137 fh32[2] = inode->i_generation; 138 fh32[2] = inode->i_generation;
138 if (parent) { 139 if (parent) {
139 struct iso_inode_info *eparent; 140 struct iso_inode_info *eparent;
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 0e73f63d9274..3620ad1ea9bc 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -114,7 +114,7 @@ extern int isofs_name_translate(struct iso_directory_record *, char *, struct in
114int get_joliet_filename(struct iso_directory_record *, unsigned char *, struct inode *); 114int get_joliet_filename(struct iso_directory_record *, unsigned char *, struct inode *);
115int get_acorn_filename(struct iso_directory_record *, char *, struct inode *); 115int get_acorn_filename(struct iso_directory_record *, char *, struct inode *);
116 116
117extern struct dentry *isofs_lookup(struct inode *, struct dentry *, struct nameidata *); 117extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int flags);
118extern struct buffer_head *isofs_bread(struct inode *, sector_t); 118extern struct buffer_head *isofs_bread(struct inode *, sector_t);
119extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); 119extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
120 120
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 1e2946f2a69e..c167028844ed 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -163,7 +163,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
163 return 0; 163 return 0;
164} 164}
165 165
166struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 166struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
167{ 167{
168 int found; 168 int found;
169 unsigned long uninitialized_var(block); 169 unsigned long uninitialized_var(block);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 008bf062fd26..a748fe21465a 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -265,8 +265,11 @@ int journal_recover(journal_t *journal)
265 if (!err) 265 if (!err)
266 err = err2; 266 err = err2;
267 /* Flush disk caches to get replayed data on the permanent storage */ 267 /* Flush disk caches to get replayed data on the permanent storage */
268 if (journal->j_flags & JFS_BARRIER) 268 if (journal->j_flags & JFS_BARRIER) {
269 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 269 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
270 if (!err)
271 err = err2;
272 }
270 273
271 return err; 274 return err;
272} 275}
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index b56018896d5e..ad7774d32095 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -25,9 +25,9 @@
25static int jffs2_readdir (struct file *, void *, filldir_t); 25static int jffs2_readdir (struct file *, void *, filldir_t);
26 26
27static int jffs2_create (struct inode *,struct dentry *,umode_t, 27static int jffs2_create (struct inode *,struct dentry *,umode_t,
28 struct nameidata *); 28 bool);
29static struct dentry *jffs2_lookup (struct inode *,struct dentry *, 29static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
30 struct nameidata *); 30 unsigned int);
31static int jffs2_link (struct dentry *,struct inode *,struct dentry *); 31static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
32static int jffs2_unlink (struct inode *,struct dentry *); 32static int jffs2_unlink (struct inode *,struct dentry *);
33static int jffs2_symlink (struct inode *,struct dentry *,const char *); 33static int jffs2_symlink (struct inode *,struct dentry *,const char *);
@@ -74,7 +74,7 @@ const struct inode_operations jffs2_dir_inode_operations =
74 nice and simple 74 nice and simple
75*/ 75*/
76static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, 76static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
77 struct nameidata *nd) 77 unsigned int flags)
78{ 78{
79 struct jffs2_inode_info *dir_f; 79 struct jffs2_inode_info *dir_f;
80 struct jffs2_full_dirent *fd = NULL, *fd_list; 80 struct jffs2_full_dirent *fd = NULL, *fd_list;
@@ -175,7 +175,7 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
175 175
176 176
177static int jffs2_create(struct inode *dir_i, struct dentry *dentry, 177static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
178 umode_t mode, struct nameidata *nd) 178 umode_t mode, bool excl)
179{ 179{
180 struct jffs2_raw_inode *ri; 180 struct jffs2_raw_inode *ri;
181 struct jffs2_inode_info *f, *dir_f; 181 struct jffs2_inode_info *f, *dir_f;
@@ -226,8 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
226 __func__, inode->i_ino, inode->i_mode, inode->i_nlink, 226 __func__, inode->i_ino, inode->i_mode, inode->i_nlink,
227 f->inocache->pino_nlink, inode->i_mapping->nrpages); 227 f->inocache->pino_nlink, inode->i_mapping->nrpages);
228 228
229 d_instantiate(dentry, inode);
230 unlock_new_inode(inode); 229 unlock_new_inode(inode);
230 d_instantiate(dentry, inode);
231 return 0; 231 return 0;
232 232
233 fail: 233 fail:
@@ -446,8 +446,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
446 mutex_unlock(&dir_f->sem); 446 mutex_unlock(&dir_f->sem);
447 jffs2_complete_reservation(c); 447 jffs2_complete_reservation(c);
448 448
449 d_instantiate(dentry, inode);
450 unlock_new_inode(inode); 449 unlock_new_inode(inode);
450 d_instantiate(dentry, inode);
451 return 0; 451 return 0;
452 452
453 fail: 453 fail:
@@ -591,8 +591,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode
591 mutex_unlock(&dir_f->sem); 591 mutex_unlock(&dir_f->sem);
592 jffs2_complete_reservation(c); 592 jffs2_complete_reservation(c);
593 593
594 d_instantiate(dentry, inode);
595 unlock_new_inode(inode); 594 unlock_new_inode(inode);
595 d_instantiate(dentry, inode);
596 return 0; 596 return 0;
597 597
598 fail: 598 fail:
@@ -766,8 +766,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
766 mutex_unlock(&dir_f->sem); 766 mutex_unlock(&dir_f->sem);
767 jffs2_complete_reservation(c); 767 jffs2_complete_reservation(c);
768 768
769 d_instantiate(dentry, inode);
770 unlock_new_inode(inode); 769 unlock_new_inode(inode);
770 d_instantiate(dentry, inode);
771 return 0; 771 return 0;
772 772
773 fail: 773 fail:
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 07c91ca6017d..3b91a7ad6086 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -73,7 +73,7 @@ static inline void free_ea_wmap(struct inode *inode)
73 * 73 *
74 */ 74 */
75static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, 75static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
76 struct nameidata *nd) 76 bool excl)
77{ 77{
78 int rc = 0; 78 int rc = 0;
79 tid_t tid; /* transaction id */ 79 tid_t tid; /* transaction id */
@@ -176,8 +176,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
176 unlock_new_inode(ip); 176 unlock_new_inode(ip);
177 iput(ip); 177 iput(ip);
178 } else { 178 } else {
179 d_instantiate(dentry, ip);
180 unlock_new_inode(ip); 179 unlock_new_inode(ip);
180 d_instantiate(dentry, ip);
181 } 181 }
182 182
183 out2: 183 out2:
@@ -309,8 +309,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
309 unlock_new_inode(ip); 309 unlock_new_inode(ip);
310 iput(ip); 310 iput(ip);
311 } else { 311 } else {
312 d_instantiate(dentry, ip);
313 unlock_new_inode(ip); 312 unlock_new_inode(ip);
313 d_instantiate(dentry, ip);
314 } 314 }
315 315
316 out2: 316 out2:
@@ -1043,8 +1043,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
1043 unlock_new_inode(ip); 1043 unlock_new_inode(ip);
1044 iput(ip); 1044 iput(ip);
1045 } else { 1045 } else {
1046 d_instantiate(dentry, ip);
1047 unlock_new_inode(ip); 1046 unlock_new_inode(ip);
1047 d_instantiate(dentry, ip);
1048 } 1048 }
1049 1049
1050 out2: 1050 out2:
@@ -1424,8 +1424,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1424 unlock_new_inode(ip); 1424 unlock_new_inode(ip);
1425 iput(ip); 1425 iput(ip);
1426 } else { 1426 } else {
1427 d_instantiate(dentry, ip);
1428 unlock_new_inode(ip); 1427 unlock_new_inode(ip);
1428 d_instantiate(dentry, ip);
1429 } 1429 }
1430 1430
1431 out1: 1431 out1:
@@ -1436,7 +1436,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1436 return rc; 1436 return rc;
1437} 1437}
1438 1438
1439static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd) 1439static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags)
1440{ 1440{
1441 struct btstack btstack; 1441 struct btstack btstack;
1442 ino_t inum; 1442 ino_t inum;
@@ -1570,7 +1570,7 @@ out:
1570 return result; 1570 return result;
1571} 1571}
1572 1572
1573static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1573static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags)
1574{ 1574{
1575 /* 1575 /*
1576 * This is not negative dentry. Always valid. 1576 * This is not negative dentry. Always valid.
@@ -1589,7 +1589,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1589 * This may be nfsd (or something), anyway, we can't see the 1589 * This may be nfsd (or something), anyway, we can't see the
1590 * intent of this. So, since this can be for creation, drop it. 1590 * intent of this. So, since this can be for creation, drop it.
1591 */ 1591 */
1592 if (!nd) 1592 if (!flags)
1593 return 0; 1593 return 0;
1594 1594
1595 /* 1595 /*
@@ -1597,7 +1597,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1597 * case sensitive name which is specified by user if this is 1597 * case sensitive name which is specified by user if this is
1598 * for creation. 1598 * for creation.
1599 */ 1599 */
1600 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 1600 if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1601 return 0; 1601 return 0;
1602 return 1; 1602 return 1;
1603} 1603}
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4a82950f412f..c55c7452d285 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -601,6 +601,11 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
601 601
602 /* log == NULL indicates read-only mount */ 602 /* log == NULL indicates read-only mount */
603 if (log) { 603 if (log) {
604 /*
605 * Write quota structures to quota file, sync_blockdev() will
606 * write them to disk later
607 */
608 dquot_writeback_dquots(sb, -1);
604 jfs_flush_journal(log, wait); 609 jfs_flush_journal(log, wait);
605 jfs_syncpt(log, 0); 610 jfs_syncpt(log, 0);
606 } 611 }
diff --git a/fs/libfs.c b/fs/libfs.c
index f86ec27a4230..a74cb1725ac6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -53,7 +53,7 @@ static int simple_delete_dentry(const struct dentry *dentry)
53 * Lookup the data. This is trivial - if the dentry didn't already 53 * Lookup the data. This is trivial - if the dentry didn't already
54 * exist, we know it is negative. Set d_op to delete negative dentries. 54 * exist, we know it is negative. Set d_op to delete negative dentries.
55 */ 55 */
56struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 56struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
57{ 57{
58 static const struct dentry_operations simple_dentry_operations = { 58 static const struct dentry_operations simple_dentry_operations = {
59 .d_delete = simple_delete_dentry, 59 .d_delete = simple_delete_dentry,
@@ -222,15 +222,15 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
222 const struct super_operations *ops, 222 const struct super_operations *ops,
223 const struct dentry_operations *dops, unsigned long magic) 223 const struct dentry_operations *dops, unsigned long magic)
224{ 224{
225 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 225 struct super_block *s;
226 struct dentry *dentry; 226 struct dentry *dentry;
227 struct inode *root; 227 struct inode *root;
228 struct qstr d_name = QSTR_INIT(name, strlen(name)); 228 struct qstr d_name = QSTR_INIT(name, strlen(name));
229 229
230 s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL);
230 if (IS_ERR(s)) 231 if (IS_ERR(s))
231 return ERR_CAST(s); 232 return ERR_CAST(s);
232 233
233 s->s_flags = MS_NOUSER;
234 s->s_maxbytes = MAX_LFS_FILESIZE; 234 s->s_maxbytes = MAX_LFS_FILESIZE;
235 s->s_blocksize = PAGE_SIZE; 235 s->s_blocksize = PAGE_SIZE;
236 s->s_blocksize_bits = PAGE_SHIFT; 236 s->s_blocksize_bits = PAGE_SHIFT;
diff --git a/fs/locks.c b/fs/locks.c
index fce6238d52c1..82c353304f9e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -308,7 +308,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,
308 return 0; 308 return 0;
309} 309}
310 310
311static int assign_type(struct file_lock *fl, int type) 311static int assign_type(struct file_lock *fl, long type)
312{ 312{
313 switch (type) { 313 switch (type) {
314 case F_RDLCK: 314 case F_RDLCK:
@@ -445,7 +445,7 @@ static const struct lock_manager_operations lease_manager_ops = {
445/* 445/*
446 * Initialize a lease, use the default lock manager operations 446 * Initialize a lease, use the default lock manager operations
447 */ 447 */
448static int lease_init(struct file *filp, int type, struct file_lock *fl) 448static int lease_init(struct file *filp, long type, struct file_lock *fl)
449 { 449 {
450 if (assign_type(fl, type) != 0) 450 if (assign_type(fl, type) != 0)
451 return -EINVAL; 451 return -EINVAL;
@@ -463,7 +463,7 @@ static int lease_init(struct file *filp, int type, struct file_lock *fl)
463} 463}
464 464
465/* Allocate a file_lock initialised to this type of lease */ 465/* Allocate a file_lock initialised to this type of lease */
466static struct file_lock *lease_alloc(struct file *filp, int type) 466static struct file_lock *lease_alloc(struct file *filp, long type)
467{ 467{
468 struct file_lock *fl = locks_alloc_lock(); 468 struct file_lock *fl = locks_alloc_lock();
469 int error = -ENOMEM; 469 int error = -ENOMEM;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index bea5d1b9954b..26e4a941532f 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -349,7 +349,7 @@ static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name)
349} 349}
350 350
351static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry, 351static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
352 struct nameidata *nd) 352 unsigned int flags)
353{ 353{
354 struct page *page; 354 struct page *page;
355 struct logfs_disk_dentry *dd; 355 struct logfs_disk_dentry *dd;
@@ -502,7 +502,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
502} 502}
503 503
504static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 504static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
505 struct nameidata *nd) 505 bool excl)
506{ 506{
507 struct inode *inode; 507 struct inode *inode;
508 508
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 97bca623d893..345c24b8a6f8 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -519,7 +519,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
519 log_super("LogFS: Start mount %x\n", mount_count++); 519 log_super("LogFS: Start mount %x\n", mount_count++);
520 520
521 err = -EINVAL; 521 err = -EINVAL;
522 sb = sget(type, logfs_sb_test, logfs_sb_set, super); 522 sb = sget(type, logfs_sb_test, logfs_sb_set, flags | MS_NOATIME, super);
523 if (IS_ERR(sb)) { 523 if (IS_ERR(sb)) {
524 super->s_devops->put_device(super); 524 super->s_devops->put_device(super);
525 kfree(super); 525 kfree(super);
@@ -542,7 +542,6 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super,
542 sb->s_maxbytes = (1ull << 43) - 1; 542 sb->s_maxbytes = (1ull << 43) - 1;
543 sb->s_max_links = LOGFS_LINK_MAX; 543 sb->s_max_links = LOGFS_LINK_MAX;
544 sb->s_op = &logfs_super_operations; 544 sb->s_op = &logfs_super_operations;
545 sb->s_flags = flags | MS_NOATIME;
546 545
547 err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY); 546 err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
548 if (err) 547 if (err)
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 2d0ee1786305..0db73d9dd668 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,7 +18,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
18 return err; 18 return err;
19} 19}
20 20
21static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 21static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
22{ 22{
23 struct inode * inode = NULL; 23 struct inode * inode = NULL;
24 ino_t ino; 24 ino_t ino;
@@ -55,7 +55,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode,
55} 55}
56 56
57static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode, 57static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
58 struct nameidata *nd) 58 bool excl)
59{ 59{
60 return minix_mknod(dir, dentry, mode, 0); 60 return minix_mknod(dir, dentry, mode, 0);
61} 61}
diff --git a/fs/mount.h b/fs/mount.h
index 4ef36d93e5a2..4f291f9de641 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -22,7 +22,6 @@ struct mount {
22 struct vfsmount mnt; 22 struct vfsmount mnt;
23#ifdef CONFIG_SMP 23#ifdef CONFIG_SMP
24 struct mnt_pcp __percpu *mnt_pcp; 24 struct mnt_pcp __percpu *mnt_pcp;
25 atomic_t mnt_longterm; /* how many of the refs are longterm */
26#else 25#else
27 int mnt_count; 26 int mnt_count;
28 int mnt_writers; 27 int mnt_writers;
@@ -49,6 +48,8 @@ struct mount {
49 int mnt_ghosts; 48 int mnt_ghosts;
50}; 49};
51 50
51#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */
52
52static inline struct mount *real_mount(struct vfsmount *mnt) 53static inline struct mount *real_mount(struct vfsmount *mnt)
53{ 54{
54 return container_of(mnt, struct mount, mnt); 55 return container_of(mnt, struct mount, mnt);
@@ -59,6 +60,12 @@ static inline int mnt_has_parent(struct mount *mnt)
59 return mnt != mnt->mnt_parent; 60 return mnt != mnt->mnt_parent;
60} 61}
61 62
63static inline int is_mounted(struct vfsmount *mnt)
64{
65 /* neither detached nor internal? */
66 return !IS_ERR_OR_NULL(real_mount(mnt));
67}
68
62extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 69extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
63 70
64static inline void get_mnt_ns(struct mnt_namespace *ns) 71static inline void get_mnt_ns(struct mnt_namespace *ns)
@@ -67,10 +74,12 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
67} 74}
68 75
69struct proc_mounts { 76struct proc_mounts {
70 struct seq_file m; /* must be the first element */ 77 struct seq_file m;
71 struct mnt_namespace *ns; 78 struct mnt_namespace *ns;
72 struct path root; 79 struct path root;
73 int (*show)(struct seq_file *, struct vfsmount *); 80 int (*show)(struct seq_file *, struct vfsmount *);
74}; 81};
75 82
83#define proc_mounts(p) (container_of((p), struct proc_mounts, m))
84
76extern const struct seq_operations mounts_op; 85extern const struct seq_operations mounts_op;
diff --git a/fs/namei.c b/fs/namei.c
index 7d694194024a..2ccc35c4dc24 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -315,31 +315,22 @@ static inline int do_inode_permission(struct inode *inode, int mask)
315} 315}
316 316
317/** 317/**
318 * inode_permission - check for access rights to a given inode 318 * __inode_permission - Check for access rights to a given inode
319 * @inode: inode to check permission on 319 * @inode: Inode to check permission on
320 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) 320 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
321 * 321 *
322 * Used to check for read/write/execute permissions on an inode. 322 * Check for read/write/execute permissions on an inode.
323 * We use "fsuid" for this, letting us set arbitrary permissions
324 * for filesystem access without changing the "normal" uids which
325 * are used for other things.
326 * 323 *
327 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. 324 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
325 *
326 * This does not check for a read-only file system. You probably want
327 * inode_permission().
328 */ 328 */
329int inode_permission(struct inode *inode, int mask) 329int __inode_permission(struct inode *inode, int mask)
330{ 330{
331 int retval; 331 int retval;
332 332
333 if (unlikely(mask & MAY_WRITE)) { 333 if (unlikely(mask & MAY_WRITE)) {
334 umode_t mode = inode->i_mode;
335
336 /*
337 * Nobody gets write access to a read-only fs.
338 */
339 if (IS_RDONLY(inode) &&
340 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
341 return -EROFS;
342
343 /* 334 /*
344 * Nobody gets write access to an immutable file. 335 * Nobody gets write access to an immutable file.
345 */ 336 */
@@ -359,6 +350,47 @@ int inode_permission(struct inode *inode, int mask)
359} 350}
360 351
361/** 352/**
353 * sb_permission - Check superblock-level permissions
354 * @sb: Superblock of inode to check permission on
355 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
356 *
357 * Separate out file-system wide checks from inode-specific permission checks.
358 */
359static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
360{
361 if (unlikely(mask & MAY_WRITE)) {
362 umode_t mode = inode->i_mode;
363
364 /* Nobody gets write access to a read-only fs. */
365 if ((sb->s_flags & MS_RDONLY) &&
366 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
367 return -EROFS;
368 }
369 return 0;
370}
371
372/**
373 * inode_permission - Check for access rights to a given inode
374 * @inode: Inode to check permission on
375 * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
376 *
377 * Check for read/write/execute permissions on an inode. We use fs[ug]id for
378 * this, letting us set arbitrary permissions for filesystem access without
379 * changing the "normal" UIDs which are used for other things.
380 *
381 * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
382 */
383int inode_permission(struct inode *inode, int mask)
384{
385 int retval;
386
387 retval = sb_permission(inode->i_sb, inode, mask);
388 if (retval)
389 return retval;
390 return __inode_permission(inode, mask);
391}
392
393/**
362 * path_get - get a reference to a path 394 * path_get - get a reference to a path
363 * @path: path to get the reference to 395 * @path: path to get the reference to
364 * 396 *
@@ -395,6 +427,18 @@ EXPORT_SYMBOL(path_put);
395 * to restart the path walk from the beginning in ref-walk mode. 427 * to restart the path walk from the beginning in ref-walk mode.
396 */ 428 */
397 429
430static inline void lock_rcu_walk(void)
431{
432 br_read_lock(&vfsmount_lock);
433 rcu_read_lock();
434}
435
436static inline void unlock_rcu_walk(void)
437{
438 rcu_read_unlock();
439 br_read_unlock(&vfsmount_lock);
440}
441
398/** 442/**
399 * unlazy_walk - try to switch to ref-walk mode. 443 * unlazy_walk - try to switch to ref-walk mode.
400 * @nd: nameidata pathwalk data 444 * @nd: nameidata pathwalk data
@@ -448,8 +492,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
448 } 492 }
449 mntget(nd->path.mnt); 493 mntget(nd->path.mnt);
450 494
451 rcu_read_unlock(); 495 unlock_rcu_walk();
452 br_read_unlock(&vfsmount_lock);
453 nd->flags &= ~LOOKUP_RCU; 496 nd->flags &= ~LOOKUP_RCU;
454 return 0; 497 return 0;
455 498
@@ -463,25 +506,9 @@ err_root:
463 return -ECHILD; 506 return -ECHILD;
464} 507}
465 508
466/** 509static inline int d_revalidate(struct dentry *dentry, unsigned int flags)
467 * release_open_intent - free up open intent resources
468 * @nd: pointer to nameidata
469 */
470void release_open_intent(struct nameidata *nd)
471{ 510{
472 struct file *file = nd->intent.open.file; 511 return dentry->d_op->d_revalidate(dentry, flags);
473
474 if (file && !IS_ERR(file)) {
475 if (file->f_path.dentry == NULL)
476 put_filp(file);
477 else
478 fput(file);
479 }
480}
481
482static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
483{
484 return dentry->d_op->d_revalidate(dentry, nd);
485} 512}
486 513
487/** 514/**
@@ -506,15 +533,13 @@ static int complete_walk(struct nameidata *nd)
506 spin_lock(&dentry->d_lock); 533 spin_lock(&dentry->d_lock);
507 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { 534 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
508 spin_unlock(&dentry->d_lock); 535 spin_unlock(&dentry->d_lock);
509 rcu_read_unlock(); 536 unlock_rcu_walk();
510 br_read_unlock(&vfsmount_lock);
511 return -ECHILD; 537 return -ECHILD;
512 } 538 }
513 BUG_ON(nd->inode != dentry->d_inode); 539 BUG_ON(nd->inode != dentry->d_inode);
514 spin_unlock(&dentry->d_lock); 540 spin_unlock(&dentry->d_lock);
515 mntget(nd->path.mnt); 541 mntget(nd->path.mnt);
516 rcu_read_unlock(); 542 unlock_rcu_walk();
517 br_read_unlock(&vfsmount_lock);
518 } 543 }
519 544
520 if (likely(!(nd->flags & LOOKUP_JUMPED))) 545 if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -527,7 +552,7 @@ static int complete_walk(struct nameidata *nd)
527 return 0; 552 return 0;
528 553
529 /* Note: we do not d_invalidate() */ 554 /* Note: we do not d_invalidate() */
530 status = d_revalidate(dentry, nd); 555 status = d_revalidate(dentry, nd->flags);
531 if (status > 0) 556 if (status > 0)
532 return 0; 557 return 0;
533 558
@@ -602,10 +627,25 @@ static inline void path_to_nameidata(const struct path *path,
602 nd->path.dentry = path->dentry; 627 nd->path.dentry = path->dentry;
603} 628}
604 629
630/*
631 * Helper to directly jump to a known parsed path from ->follow_link,
632 * caller must have taken a reference to path beforehand.
633 */
634void nd_jump_link(struct nameidata *nd, struct path *path)
635{
636 path_put(&nd->path);
637
638 nd->path = *path;
639 nd->inode = nd->path.dentry->d_inode;
640 nd->flags |= LOOKUP_JUMPED;
641
642 BUG_ON(nd->inode->i_op->follow_link);
643}
644
605static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) 645static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
606{ 646{
607 struct inode *inode = link->dentry->d_inode; 647 struct inode *inode = link->dentry->d_inode;
608 if (!IS_ERR(cookie) && inode->i_op->put_link) 648 if (inode->i_op->put_link)
609 inode->i_op->put_link(link->dentry, nd, cookie); 649 inode->i_op->put_link(link->dentry, nd, cookie);
610 path_put(link); 650 path_put(link);
611} 651}
@@ -613,19 +653,19 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
613static __always_inline int 653static __always_inline int
614follow_link(struct path *link, struct nameidata *nd, void **p) 654follow_link(struct path *link, struct nameidata *nd, void **p)
615{ 655{
616 int error;
617 struct dentry *dentry = link->dentry; 656 struct dentry *dentry = link->dentry;
657 int error;
658 char *s;
618 659
619 BUG_ON(nd->flags & LOOKUP_RCU); 660 BUG_ON(nd->flags & LOOKUP_RCU);
620 661
621 if (link->mnt == nd->path.mnt) 662 if (link->mnt == nd->path.mnt)
622 mntget(link->mnt); 663 mntget(link->mnt);
623 664
624 if (unlikely(current->total_link_count >= 40)) { 665 error = -ELOOP;
625 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */ 666 if (unlikely(current->total_link_count >= 40))
626 path_put(&nd->path); 667 goto out_put_nd_path;
627 return -ELOOP; 668
628 }
629 cond_resched(); 669 cond_resched();
630 current->total_link_count++; 670 current->total_link_count++;
631 671
@@ -633,30 +673,28 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
633 nd_set_link(nd, NULL); 673 nd_set_link(nd, NULL);
634 674
635 error = security_inode_follow_link(link->dentry, nd); 675 error = security_inode_follow_link(link->dentry, nd);
636 if (error) { 676 if (error)
637 *p = ERR_PTR(error); /* no ->put_link(), please */ 677 goto out_put_nd_path;
638 path_put(&nd->path);
639 return error;
640 }
641 678
642 nd->last_type = LAST_BIND; 679 nd->last_type = LAST_BIND;
643 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 680 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
644 error = PTR_ERR(*p); 681 error = PTR_ERR(*p);
645 if (!IS_ERR(*p)) { 682 if (IS_ERR(*p))
646 char *s = nd_get_link(nd); 683 goto out_put_nd_path;
647 error = 0; 684
648 if (s) 685 error = 0;
649 error = __vfs_follow_link(nd, s); 686 s = nd_get_link(nd);
650 else if (nd->last_type == LAST_BIND) { 687 if (s) {
651 nd->flags |= LOOKUP_JUMPED; 688 error = __vfs_follow_link(nd, s);
652 nd->inode = nd->path.dentry->d_inode; 689 if (unlikely(error))
653 if (nd->inode->i_op->follow_link) { 690 put_link(nd, link, *p);
654 /* stepped on a _really_ weird one */
655 path_put(&nd->path);
656 error = -ELOOP;
657 }
658 }
659 } 691 }
692
693 return error;
694
695out_put_nd_path:
696 path_put(&nd->path);
697 path_put(link);
660 return error; 698 return error;
661} 699}
662 700
@@ -675,6 +713,16 @@ static int follow_up_rcu(struct path *path)
675 return 1; 713 return 1;
676} 714}
677 715
716/*
717 * follow_up - Find the mountpoint of path's vfsmount
718 *
719 * Given a path, find the mountpoint of its source file system.
720 * Replace @path with the path of the mountpoint in the parent mount.
721 * Up is towards /.
722 *
723 * Return 1 if we went up a level and 0 if we were already at the
724 * root.
725 */
678int follow_up(struct path *path) 726int follow_up(struct path *path)
679{ 727{
680 struct mount *mnt = real_mount(path->mnt); 728 struct mount *mnt = real_mount(path->mnt);
@@ -683,7 +731,7 @@ int follow_up(struct path *path)
683 731
684 br_read_lock(&vfsmount_lock); 732 br_read_lock(&vfsmount_lock);
685 parent = mnt->mnt_parent; 733 parent = mnt->mnt_parent;
686 if (&parent->mnt == path->mnt) { 734 if (parent == mnt) {
687 br_read_unlock(&vfsmount_lock); 735 br_read_unlock(&vfsmount_lock);
688 return 0; 736 return 0;
689 } 737 }
@@ -946,8 +994,7 @@ failed:
946 nd->flags &= ~LOOKUP_RCU; 994 nd->flags &= ~LOOKUP_RCU;
947 if (!(nd->flags & LOOKUP_ROOT)) 995 if (!(nd->flags & LOOKUP_ROOT))
948 nd->root.mnt = NULL; 996 nd->root.mnt = NULL;
949 rcu_read_unlock(); 997 unlock_rcu_walk();
950 br_read_unlock(&vfsmount_lock);
951 return -ECHILD; 998 return -ECHILD;
952} 999}
953 1000
@@ -1048,7 +1095,7 @@ static void follow_dotdot(struct nameidata *nd)
1048 * dir->d_inode->i_mutex must be held 1095 * dir->d_inode->i_mutex must be held
1049 */ 1096 */
1050static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, 1097static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
1051 struct nameidata *nd, bool *need_lookup) 1098 unsigned int flags, bool *need_lookup)
1052{ 1099{
1053 struct dentry *dentry; 1100 struct dentry *dentry;
1054 int error; 1101 int error;
@@ -1059,7 +1106,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
1059 if (d_need_lookup(dentry)) { 1106 if (d_need_lookup(dentry)) {
1060 *need_lookup = true; 1107 *need_lookup = true;
1061 } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { 1108 } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
1062 error = d_revalidate(dentry, nd); 1109 error = d_revalidate(dentry, flags);
1063 if (unlikely(error <= 0)) { 1110 if (unlikely(error <= 0)) {
1064 if (error < 0) { 1111 if (error < 0) {
1065 dput(dentry); 1112 dput(dentry);
@@ -1089,7 +1136,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
1089 * dir->d_inode->i_mutex must be held 1136 * dir->d_inode->i_mutex must be held
1090 */ 1137 */
1091static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, 1138static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
1092 struct nameidata *nd) 1139 unsigned int flags)
1093{ 1140{
1094 struct dentry *old; 1141 struct dentry *old;
1095 1142
@@ -1099,7 +1146,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
1099 return ERR_PTR(-ENOENT); 1146 return ERR_PTR(-ENOENT);
1100 } 1147 }
1101 1148
1102 old = dir->i_op->lookup(dir, dentry, nd); 1149 old = dir->i_op->lookup(dir, dentry, flags);
1103 if (unlikely(old)) { 1150 if (unlikely(old)) {
1104 dput(dentry); 1151 dput(dentry);
1105 dentry = old; 1152 dentry = old;
@@ -1108,16 +1155,16 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
1108} 1155}
1109 1156
1110static struct dentry *__lookup_hash(struct qstr *name, 1157static struct dentry *__lookup_hash(struct qstr *name,
1111 struct dentry *base, struct nameidata *nd) 1158 struct dentry *base, unsigned int flags)
1112{ 1159{
1113 bool need_lookup; 1160 bool need_lookup;
1114 struct dentry *dentry; 1161 struct dentry *dentry;
1115 1162
1116 dentry = lookup_dcache(name, base, nd, &need_lookup); 1163 dentry = lookup_dcache(name, base, flags, &need_lookup);
1117 if (!need_lookup) 1164 if (!need_lookup)
1118 return dentry; 1165 return dentry;
1119 1166
1120 return lookup_real(base->d_inode, dentry, nd); 1167 return lookup_real(base->d_inode, dentry, flags);
1121} 1168}
1122 1169
1123/* 1170/*
@@ -1167,7 +1214,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name,
1167 if (unlikely(d_need_lookup(dentry))) 1214 if (unlikely(d_need_lookup(dentry)))
1168 goto unlazy; 1215 goto unlazy;
1169 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1216 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1170 status = d_revalidate(dentry, nd); 1217 status = d_revalidate(dentry, nd->flags);
1171 if (unlikely(status <= 0)) { 1218 if (unlikely(status <= 0)) {
1172 if (status != -ECHILD) 1219 if (status != -ECHILD)
1173 need_reval = 0; 1220 need_reval = 0;
@@ -1197,7 +1244,7 @@ unlazy:
1197 } 1244 }
1198 1245
1199 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) 1246 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1200 status = d_revalidate(dentry, nd); 1247 status = d_revalidate(dentry, nd->flags);
1201 if (unlikely(status <= 0)) { 1248 if (unlikely(status <= 0)) {
1202 if (status < 0) { 1249 if (status < 0) {
1203 dput(dentry); 1250 dput(dentry);
@@ -1236,7 +1283,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name,
1236 BUG_ON(nd->inode != parent->d_inode); 1283 BUG_ON(nd->inode != parent->d_inode);
1237 1284
1238 mutex_lock(&parent->d_inode->i_mutex); 1285 mutex_lock(&parent->d_inode->i_mutex);
1239 dentry = __lookup_hash(name, parent, nd); 1286 dentry = __lookup_hash(name, parent, nd->flags);
1240 mutex_unlock(&parent->d_inode->i_mutex); 1287 mutex_unlock(&parent->d_inode->i_mutex);
1241 if (IS_ERR(dentry)) 1288 if (IS_ERR(dentry))
1242 return PTR_ERR(dentry); 1289 return PTR_ERR(dentry);
@@ -1284,8 +1331,7 @@ static void terminate_walk(struct nameidata *nd)
1284 nd->flags &= ~LOOKUP_RCU; 1331 nd->flags &= ~LOOKUP_RCU;
1285 if (!(nd->flags & LOOKUP_ROOT)) 1332 if (!(nd->flags & LOOKUP_ROOT))
1286 nd->root.mnt = NULL; 1333 nd->root.mnt = NULL;
1287 rcu_read_unlock(); 1334 unlock_rcu_walk();
1288 br_read_unlock(&vfsmount_lock);
1289 } 1335 }
1290} 1336}
1291 1337
@@ -1383,9 +1429,10 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
1383 void *cookie; 1429 void *cookie;
1384 1430
1385 res = follow_link(&link, nd, &cookie); 1431 res = follow_link(&link, nd, &cookie);
1386 if (!res) 1432 if (res)
1387 res = walk_component(nd, path, &nd->last, 1433 break;
1388 nd->last_type, LOOKUP_FOLLOW); 1434 res = walk_component(nd, path, &nd->last,
1435 nd->last_type, LOOKUP_FOLLOW);
1389 put_link(nd, &link, cookie); 1436 put_link(nd, &link, cookie);
1390 } while (res > 0); 1437 } while (res > 0);
1391 1438
@@ -1651,8 +1698,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1651 nd->path = nd->root; 1698 nd->path = nd->root;
1652 nd->inode = inode; 1699 nd->inode = inode;
1653 if (flags & LOOKUP_RCU) { 1700 if (flags & LOOKUP_RCU) {
1654 br_read_lock(&vfsmount_lock); 1701 lock_rcu_walk();
1655 rcu_read_lock();
1656 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1702 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1657 } else { 1703 } else {
1658 path_get(&nd->path); 1704 path_get(&nd->path);
@@ -1664,8 +1710,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1664 1710
1665 if (*name=='/') { 1711 if (*name=='/') {
1666 if (flags & LOOKUP_RCU) { 1712 if (flags & LOOKUP_RCU) {
1667 br_read_lock(&vfsmount_lock); 1713 lock_rcu_walk();
1668 rcu_read_lock();
1669 set_root_rcu(nd); 1714 set_root_rcu(nd);
1670 } else { 1715 } else {
1671 set_root(nd); 1716 set_root(nd);
@@ -1677,8 +1722,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1677 struct fs_struct *fs = current->fs; 1722 struct fs_struct *fs = current->fs;
1678 unsigned seq; 1723 unsigned seq;
1679 1724
1680 br_read_lock(&vfsmount_lock); 1725 lock_rcu_walk();
1681 rcu_read_lock();
1682 1726
1683 do { 1727 do {
1684 seq = read_seqcount_begin(&fs->seq); 1728 seq = read_seqcount_begin(&fs->seq);
@@ -1713,8 +1757,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1713 if (fput_needed) 1757 if (fput_needed)
1714 *fp = file; 1758 *fp = file;
1715 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1759 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1716 br_read_lock(&vfsmount_lock); 1760 lock_rcu_walk();
1717 rcu_read_lock();
1718 } else { 1761 } else {
1719 path_get(&file->f_path); 1762 path_get(&file->f_path);
1720 fput_light(file, fput_needed); 1763 fput_light(file, fput_needed);
@@ -1777,8 +1820,9 @@ static int path_lookupat(int dfd, const char *name,
1777 struct path link = path; 1820 struct path link = path;
1778 nd->flags |= LOOKUP_PARENT; 1821 nd->flags |= LOOKUP_PARENT;
1779 err = follow_link(&link, nd, &cookie); 1822 err = follow_link(&link, nd, &cookie);
1780 if (!err) 1823 if (err)
1781 err = lookup_last(nd, &path); 1824 break;
1825 err = lookup_last(nd, &path);
1782 put_link(nd, &link, cookie); 1826 put_link(nd, &link, cookie);
1783 } 1827 }
1784 } 1828 }
@@ -1821,9 +1865,27 @@ static int do_path_lookup(int dfd, const char *name,
1821 return retval; 1865 return retval;
1822} 1866}
1823 1867
1824int kern_path_parent(const char *name, struct nameidata *nd) 1868/* does lookup, returns the object with parent locked */
1869struct dentry *kern_path_locked(const char *name, struct path *path)
1825{ 1870{
1826 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); 1871 struct nameidata nd;
1872 struct dentry *d;
1873 int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd);
1874 if (err)
1875 return ERR_PTR(err);
1876 if (nd.last_type != LAST_NORM) {
1877 path_put(&nd.path);
1878 return ERR_PTR(-EINVAL);
1879 }
1880 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
1881 d = __lookup_hash(&nd.last, nd.path.dentry, 0);
1882 if (IS_ERR(d)) {
1883 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
1884 path_put(&nd.path);
1885 return d;
1886 }
1887 *path = nd.path;
1888 return d;
1827} 1889}
1828 1890
1829int kern_path(const char *name, unsigned int flags, struct path *path) 1891int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1866,7 +1928,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1866 */ 1928 */
1867static struct dentry *lookup_hash(struct nameidata *nd) 1929static struct dentry *lookup_hash(struct nameidata *nd)
1868{ 1930{
1869 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1931 return __lookup_hash(&nd->last, nd->path.dentry, nd->flags);
1870} 1932}
1871 1933
1872/** 1934/**
@@ -1913,7 +1975,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1913 if (err) 1975 if (err)
1914 return ERR_PTR(err); 1976 return ERR_PTR(err);
1915 1977
1916 return __lookup_hash(&this, base, NULL); 1978 return __lookup_hash(&this, base, 0);
1917} 1979}
1918 1980
1919int user_path_at_empty(int dfd, const char __user *name, unsigned flags, 1981int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
@@ -2086,10 +2148,9 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
2086} 2148}
2087 2149
2088int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 2150int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2089 struct nameidata *nd) 2151 bool want_excl)
2090{ 2152{
2091 int error = may_create(dir, dentry); 2153 int error = may_create(dir, dentry);
2092
2093 if (error) 2154 if (error)
2094 return error; 2155 return error;
2095 2156
@@ -2100,7 +2161,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2100 error = security_inode_create(dir, dentry, mode); 2161 error = security_inode_create(dir, dentry, mode);
2101 if (error) 2162 if (error)
2102 return error; 2163 return error;
2103 error = dir->i_op->create(dir, dentry, mode, nd); 2164 error = dir->i_op->create(dir, dentry, mode, want_excl);
2104 if (!error) 2165 if (!error)
2105 fsnotify_create(dir, dentry); 2166 fsnotify_create(dir, dentry);
2106 return error; 2167 return error;
@@ -2187,21 +2248,275 @@ static inline int open_to_namei_flags(int flag)
2187 return flag; 2248 return flag;
2188} 2249}
2189 2250
2251static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
2252{
2253 int error = security_path_mknod(dir, dentry, mode, 0);
2254 if (error)
2255 return error;
2256
2257 error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
2258 if (error)
2259 return error;
2260
2261 return security_inode_create(dir->dentry->d_inode, dentry, mode);
2262}
2263
2190/* 2264/*
2191 * Handle the last step of open() 2265 * Attempt to atomically look up, create and open a file from a negative
2266 * dentry.
2267 *
2268 * Returns 0 if successful. The file will have been created and attached to
2269 * @file by the filesystem calling finish_open().
2270 *
2271 * Returns 1 if the file was looked up only or didn't need creating. The
2272 * caller will need to perform the open themselves. @path will have been
2273 * updated to point to the new dentry. This may be negative.
2274 *
2275 * Returns an error code otherwise.
2276 */
2277static int atomic_open(struct nameidata *nd, struct dentry *dentry,
2278 struct path *path, struct file *file,
2279 const struct open_flags *op,
2280 bool *want_write, bool need_lookup,
2281 int *opened)
2282{
2283 struct inode *dir = nd->path.dentry->d_inode;
2284 unsigned open_flag = open_to_namei_flags(op->open_flag);
2285 umode_t mode;
2286 int error;
2287 int acc_mode;
2288 int create_error = 0;
2289 struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
2290
2291 BUG_ON(dentry->d_inode);
2292
2293 /* Don't create child dentry for a dead directory. */
2294 if (unlikely(IS_DEADDIR(dir))) {
2295 error = -ENOENT;
2296 goto out;
2297 }
2298
2299 mode = op->mode & S_IALLUGO;
2300 if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2301 mode &= ~current_umask();
2302
2303 if (open_flag & O_EXCL) {
2304 open_flag &= ~O_TRUNC;
2305 *opened |= FILE_CREATED;
2306 }
2307
2308 /*
2309 * Checking write permission is tricky, bacuse we don't know if we are
2310 * going to actually need it: O_CREAT opens should work as long as the
2311 * file exists. But checking existence breaks atomicity. The trick is
2312 * to check access and if not granted clear O_CREAT from the flags.
2313 *
2314 * Another problem is returing the "right" error value (e.g. for an
2315 * O_EXCL open we want to return EEXIST not EROFS).
2316 */
2317 if ((open_flag & (O_CREAT | O_TRUNC)) ||
2318 (open_flag & O_ACCMODE) != O_RDONLY) {
2319 error = mnt_want_write(nd->path.mnt);
2320 if (!error) {
2321 *want_write = true;
2322 } else if (!(open_flag & O_CREAT)) {
2323 /*
2324 * No O_CREATE -> atomicity not a requirement -> fall
2325 * back to lookup + open
2326 */
2327 goto no_open;
2328 } else if (open_flag & (O_EXCL | O_TRUNC)) {
2329 /* Fall back and fail with the right error */
2330 create_error = error;
2331 goto no_open;
2332 } else {
2333 /* No side effects, safe to clear O_CREAT */
2334 create_error = error;
2335 open_flag &= ~O_CREAT;
2336 }
2337 }
2338
2339 if (open_flag & O_CREAT) {
2340 error = may_o_create(&nd->path, dentry, op->mode);
2341 if (error) {
2342 create_error = error;
2343 if (open_flag & O_EXCL)
2344 goto no_open;
2345 open_flag &= ~O_CREAT;
2346 }
2347 }
2348
2349 if (nd->flags & LOOKUP_DIRECTORY)
2350 open_flag |= O_DIRECTORY;
2351
2352 file->f_path.dentry = DENTRY_NOT_SET;
2353 file->f_path.mnt = nd->path.mnt;
2354 error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode,
2355 opened);
2356 if (error < 0) {
2357 if (create_error && error == -ENOENT)
2358 error = create_error;
2359 goto out;
2360 }
2361
2362 acc_mode = op->acc_mode;
2363 if (*opened & FILE_CREATED) {
2364 fsnotify_create(dir, dentry);
2365 acc_mode = MAY_OPEN;
2366 }
2367
2368 if (error) { /* returned 1, that is */
2369 if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
2370 error = -EIO;
2371 goto out;
2372 }
2373 if (file->f_path.dentry) {
2374 dput(dentry);
2375 dentry = file->f_path.dentry;
2376 }
2377 goto looked_up;
2378 }
2379
2380 /*
2381 * We didn't have the inode before the open, so check open permission
2382 * here.
2383 */
2384 error = may_open(&file->f_path, acc_mode, open_flag);
2385 if (error)
2386 fput(file);
2387
2388out:
2389 dput(dentry);
2390 return error;
2391
2392no_open:
2393 if (need_lookup) {
2394 dentry = lookup_real(dir, dentry, nd->flags);
2395 if (IS_ERR(dentry))
2396 return PTR_ERR(dentry);
2397
2398 if (create_error) {
2399 int open_flag = op->open_flag;
2400
2401 error = create_error;
2402 if ((open_flag & O_EXCL)) {
2403 if (!dentry->d_inode)
2404 goto out;
2405 } else if (!dentry->d_inode) {
2406 goto out;
2407 } else if ((open_flag & O_TRUNC) &&
2408 S_ISREG(dentry->d_inode->i_mode)) {
2409 goto out;
2410 }
2411 /* will fail later, go on to get the right error */
2412 }
2413 }
2414looked_up:
2415 path->dentry = dentry;
2416 path->mnt = nd->path.mnt;
2417 return 1;
2418}
2419
2420/*
2421 * Look up and maybe create and open the last component.
2422 *
2423 * Must be called with i_mutex held on parent.
2424 *
2425 * Returns 0 if the file was successfully atomically created (if necessary) and
2426 * opened. In this case the file will be returned attached to @file.
2427 *
2428 * Returns 1 if the file was not completely opened at this time, though lookups
2429 * and creations will have been performed and the dentry returned in @path will
2430 * be positive upon return if O_CREAT was specified. If O_CREAT wasn't
2431 * specified then a negative dentry may be returned.
2432 *
2433 * An error code is returned otherwise.
2434 *
2435 * FILE_CREATE will be set in @*opened if the dentry was created and will be
2436 * cleared otherwise prior to returning.
2192 */ 2437 */
2193static struct file *do_last(struct nameidata *nd, struct path *path, 2438static int lookup_open(struct nameidata *nd, struct path *path,
2194 const struct open_flags *op, const char *pathname) 2439 struct file *file,
2440 const struct open_flags *op,
2441 bool *want_write, int *opened)
2195{ 2442{
2196 struct dentry *dir = nd->path.dentry; 2443 struct dentry *dir = nd->path.dentry;
2444 struct inode *dir_inode = dir->d_inode;
2197 struct dentry *dentry; 2445 struct dentry *dentry;
2446 int error;
2447 bool need_lookup;
2448
2449 *opened &= ~FILE_CREATED;
2450 dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);
2451 if (IS_ERR(dentry))
2452 return PTR_ERR(dentry);
2453
2454 /* Cached positive dentry: will open in f_op->open */
2455 if (!need_lookup && dentry->d_inode)
2456 goto out_no_open;
2457
2458 if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
2459 return atomic_open(nd, dentry, path, file, op, want_write,
2460 need_lookup, opened);
2461 }
2462
2463 if (need_lookup) {
2464 BUG_ON(dentry->d_inode);
2465
2466 dentry = lookup_real(dir_inode, dentry, nd->flags);
2467 if (IS_ERR(dentry))
2468 return PTR_ERR(dentry);
2469 }
2470
2471 /* Negative dentry, just create the file */
2472 if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
2473 umode_t mode = op->mode;
2474 if (!IS_POSIXACL(dir->d_inode))
2475 mode &= ~current_umask();
2476 /*
2477 * This write is needed to ensure that a
2478 * rw->ro transition does not occur between
2479 * the time when the file is created and when
2480 * a permanent write count is taken through
2481 * the 'struct file' in finish_open().
2482 */
2483 error = mnt_want_write(nd->path.mnt);
2484 if (error)
2485 goto out_dput;
2486 *want_write = true;
2487 *opened |= FILE_CREATED;
2488 error = security_path_mknod(&nd->path, dentry, mode, 0);
2489 if (error)
2490 goto out_dput;
2491 error = vfs_create(dir->d_inode, dentry, mode,
2492 nd->flags & LOOKUP_EXCL);
2493 if (error)
2494 goto out_dput;
2495 }
2496out_no_open:
2497 path->dentry = dentry;
2498 path->mnt = nd->path.mnt;
2499 return 1;
2500
2501out_dput:
2502 dput(dentry);
2503 return error;
2504}
2505
2506/*
2507 * Handle the last step of open()
2508 */
2509static int do_last(struct nameidata *nd, struct path *path,
2510 struct file *file, const struct open_flags *op,
2511 int *opened, const char *pathname)
2512{
2513 struct dentry *dir = nd->path.dentry;
2198 int open_flag = op->open_flag; 2514 int open_flag = op->open_flag;
2199 int will_truncate = open_flag & O_TRUNC; 2515 bool will_truncate = (open_flag & O_TRUNC) != 0;
2200 int want_write = 0; 2516 bool want_write = false;
2201 int acc_mode = op->acc_mode; 2517 int acc_mode = op->acc_mode;
2202 struct file *filp;
2203 struct inode *inode; 2518 struct inode *inode;
2204 int symlink_ok = 0; 2519 bool symlink_ok = false;
2205 struct path save_parent = { .dentry = NULL, .mnt = NULL }; 2520 struct path save_parent = { .dentry = NULL, .mnt = NULL };
2206 bool retried = false; 2521 bool retried = false;
2207 int error; 2522 int error;
@@ -2214,112 +2529,99 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2214 case LAST_DOT: 2529 case LAST_DOT:
2215 error = handle_dots(nd, nd->last_type); 2530 error = handle_dots(nd, nd->last_type);
2216 if (error) 2531 if (error)
2217 return ERR_PTR(error); 2532 return error;
2218 /* fallthrough */ 2533 /* fallthrough */
2219 case LAST_ROOT: 2534 case LAST_ROOT:
2220 error = complete_walk(nd); 2535 error = complete_walk(nd);
2221 if (error) 2536 if (error)
2222 return ERR_PTR(error); 2537 return error;
2223 audit_inode(pathname, nd->path.dentry); 2538 audit_inode(pathname, nd->path.dentry);
2224 if (open_flag & O_CREAT) { 2539 if (open_flag & O_CREAT) {
2225 error = -EISDIR; 2540 error = -EISDIR;
2226 goto exit; 2541 goto out;
2227 } 2542 }
2228 goto ok; 2543 goto finish_open;
2229 case LAST_BIND: 2544 case LAST_BIND:
2230 error = complete_walk(nd); 2545 error = complete_walk(nd);
2231 if (error) 2546 if (error)
2232 return ERR_PTR(error); 2547 return error;
2233 audit_inode(pathname, dir); 2548 audit_inode(pathname, dir);
2234 goto ok; 2549 goto finish_open;
2235 } 2550 }
2236 2551
2237 if (!(open_flag & O_CREAT)) { 2552 if (!(open_flag & O_CREAT)) {
2238 if (nd->last.name[nd->last.len]) 2553 if (nd->last.name[nd->last.len])
2239 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; 2554 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2240 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) 2555 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2241 symlink_ok = 1; 2556 symlink_ok = true;
2242 /* we _can_ be in RCU mode here */ 2557 /* we _can_ be in RCU mode here */
2243 error = lookup_fast(nd, &nd->last, path, &inode); 2558 error = lookup_fast(nd, &nd->last, path, &inode);
2244 if (unlikely(error)) { 2559 if (likely(!error))
2245 if (error < 0) 2560 goto finish_lookup;
2246 goto exit;
2247 2561
2248 error = lookup_slow(nd, &nd->last, path); 2562 if (error < 0)
2249 if (error < 0) 2563 goto out;
2250 goto exit;
2251 2564
2252 inode = path->dentry->d_inode; 2565 BUG_ON(nd->inode != dir->d_inode);
2253 } 2566 } else {
2254 goto finish_lookup; 2567 /* create side of things */
2255 } 2568 /*
2256 2569 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
2257 /* create side of things */ 2570 * has been cleared when we got to the last component we are
2258 /* 2571 * about to look up
2259 * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been 2572 */
2260 * cleared when we got to the last component we are about to look up 2573 error = complete_walk(nd);
2261 */ 2574 if (error)
2262 error = complete_walk(nd); 2575 return error;
2263 if (error)
2264 return ERR_PTR(error);
2265 2576
2266 audit_inode(pathname, dir); 2577 audit_inode(pathname, dir);
2267 error = -EISDIR; 2578 error = -EISDIR;
2268 /* trailing slashes? */ 2579 /* trailing slashes? */
2269 if (nd->last.name[nd->last.len]) 2580 if (nd->last.name[nd->last.len])
2270 goto exit; 2581 goto out;
2582 }
2271 2583
2272retry_lookup: 2584retry_lookup:
2273 mutex_lock(&dir->d_inode->i_mutex); 2585 mutex_lock(&dir->d_inode->i_mutex);
2586 error = lookup_open(nd, path, file, op, &want_write, opened);
2587 mutex_unlock(&dir->d_inode->i_mutex);
2274 2588
2275 dentry = lookup_hash(nd); 2589 if (error <= 0) {
2276 error = PTR_ERR(dentry); 2590 if (error)
2277 if (IS_ERR(dentry)) { 2591 goto out;
2278 mutex_unlock(&dir->d_inode->i_mutex);
2279 goto exit;
2280 }
2281 2592
2282 path->dentry = dentry; 2593 if ((*opened & FILE_CREATED) ||
2283 path->mnt = nd->path.mnt; 2594 !S_ISREG(file->f_path.dentry->d_inode->i_mode))
2595 will_truncate = false;
2284 2596
2285 /* Negative dentry, just create the file */ 2597 audit_inode(pathname, file->f_path.dentry);
2286 if (!dentry->d_inode) { 2598 goto opened;
2287 umode_t mode = op->mode; 2599 }
2288 if (!IS_POSIXACL(dir->d_inode)) 2600
2289 mode &= ~current_umask(); 2601 if (*opened & FILE_CREATED) {
2290 /*
2291 * This write is needed to ensure that a
2292 * rw->ro transition does not occur between
2293 * the time when the file is created and when
2294 * a permanent write count is taken through
2295 * the 'struct file' in nameidata_to_filp().
2296 */
2297 error = mnt_want_write(nd->path.mnt);
2298 if (error)
2299 goto exit_mutex_unlock;
2300 want_write = 1;
2301 /* Don't check for write permission, don't truncate */ 2602 /* Don't check for write permission, don't truncate */
2302 open_flag &= ~O_TRUNC; 2603 open_flag &= ~O_TRUNC;
2303 will_truncate = 0; 2604 will_truncate = false;
2304 acc_mode = MAY_OPEN; 2605 acc_mode = MAY_OPEN;
2305 error = security_path_mknod(&nd->path, dentry, mode, 0); 2606 path_to_nameidata(path, nd);
2306 if (error) 2607 goto finish_open_created;
2307 goto exit_mutex_unlock;
2308 error = vfs_create(dir->d_inode, dentry, mode, nd);
2309 if (error)
2310 goto exit_mutex_unlock;
2311 mutex_unlock(&dir->d_inode->i_mutex);
2312 dput(nd->path.dentry);
2313 nd->path.dentry = dentry;
2314 goto common;
2315 } 2608 }
2316 2609
2317 /* 2610 /*
2318 * It already exists. 2611 * It already exists.
2319 */ 2612 */
2320 mutex_unlock(&dir->d_inode->i_mutex);
2321 audit_inode(pathname, path->dentry); 2613 audit_inode(pathname, path->dentry);
2322 2614
2615 /*
2616 * If atomic_open() acquired write access it is dropped now due to
2617 * possible mount and symlink following (this might be optimized away if
2618 * necessary...)
2619 */
2620 if (want_write) {
2621 mnt_drop_write(nd->path.mnt);
2622 want_write = false;
2623 }
2624
2323 error = -EEXIST; 2625 error = -EEXIST;
2324 if (open_flag & O_EXCL) 2626 if (open_flag & O_EXCL)
2325 goto exit_dput; 2627 goto exit_dput;
@@ -2338,18 +2640,18 @@ finish_lookup:
2338 error = -ENOENT; 2640 error = -ENOENT;
2339 if (!inode) { 2641 if (!inode) {
2340 path_to_nameidata(path, nd); 2642 path_to_nameidata(path, nd);
2341 goto exit; 2643 goto out;
2342 } 2644 }
2343 2645
2344 if (should_follow_link(inode, !symlink_ok)) { 2646 if (should_follow_link(inode, !symlink_ok)) {
2345 if (nd->flags & LOOKUP_RCU) { 2647 if (nd->flags & LOOKUP_RCU) {
2346 if (unlikely(unlazy_walk(nd, path->dentry))) { 2648 if (unlikely(unlazy_walk(nd, path->dentry))) {
2347 error = -ECHILD; 2649 error = -ECHILD;
2348 goto exit; 2650 goto out;
2349 } 2651 }
2350 } 2652 }
2351 BUG_ON(inode != path->dentry->d_inode); 2653 BUG_ON(inode != path->dentry->d_inode);
2352 return NULL; 2654 return 1;
2353 } 2655 }
2354 2656
2355 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { 2657 if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
@@ -2365,119 +2667,122 @@ finish_lookup:
2365 error = complete_walk(nd); 2667 error = complete_walk(nd);
2366 if (error) { 2668 if (error) {
2367 path_put(&save_parent); 2669 path_put(&save_parent);
2368 return ERR_PTR(error); 2670 return error;
2369 } 2671 }
2370 error = -EISDIR; 2672 error = -EISDIR;
2371 if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) 2673 if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
2372 goto exit; 2674 goto out;
2373 error = -ENOTDIR; 2675 error = -ENOTDIR;
2374 if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) 2676 if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup)
2375 goto exit; 2677 goto out;
2376 audit_inode(pathname, nd->path.dentry); 2678 audit_inode(pathname, nd->path.dentry);
2377ok: 2679finish_open:
2378 if (!S_ISREG(nd->inode->i_mode)) 2680 if (!S_ISREG(nd->inode->i_mode))
2379 will_truncate = 0; 2681 will_truncate = false;
2380 2682
2381 if (will_truncate) { 2683 if (will_truncate) {
2382 error = mnt_want_write(nd->path.mnt); 2684 error = mnt_want_write(nd->path.mnt);
2383 if (error) 2685 if (error)
2384 goto exit; 2686 goto out;
2385 want_write = 1; 2687 want_write = true;
2386 } 2688 }
2387common: 2689finish_open_created:
2388 error = may_open(&nd->path, acc_mode, open_flag); 2690 error = may_open(&nd->path, acc_mode, open_flag);
2389 if (error) 2691 if (error)
2390 goto exit; 2692 goto out;
2391 filp = nameidata_to_filp(nd); 2693 file->f_path.mnt = nd->path.mnt;
2392 if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { 2694 error = finish_open(file, nd->path.dentry, NULL, opened);
2393 BUG_ON(save_parent.dentry != dir); 2695 if (error) {
2394 path_put(&nd->path); 2696 if (error == -EOPENSTALE)
2395 nd->path = save_parent; 2697 goto stale_open;
2396 nd->inode = dir->d_inode; 2698 goto out;
2397 save_parent.mnt = NULL;
2398 save_parent.dentry = NULL;
2399 if (want_write) {
2400 mnt_drop_write(nd->path.mnt);
2401 want_write = 0;
2402 }
2403 retried = true;
2404 goto retry_lookup;
2405 }
2406 if (!IS_ERR(filp)) {
2407 error = ima_file_check(filp, op->acc_mode);
2408 if (error) {
2409 fput(filp);
2410 filp = ERR_PTR(error);
2411 }
2412 } 2699 }
2413 if (!IS_ERR(filp)) { 2700opened:
2414 if (will_truncate) { 2701 error = open_check_o_direct(file);
2415 error = handle_truncate(filp); 2702 if (error)
2416 if (error) { 2703 goto exit_fput;
2417 fput(filp); 2704 error = ima_file_check(file, op->acc_mode);
2418 filp = ERR_PTR(error); 2705 if (error)
2419 } 2706 goto exit_fput;
2420 } 2707
2708 if (will_truncate) {
2709 error = handle_truncate(file);
2710 if (error)
2711 goto exit_fput;
2421 } 2712 }
2422out: 2713out:
2423 if (want_write) 2714 if (want_write)
2424 mnt_drop_write(nd->path.mnt); 2715 mnt_drop_write(nd->path.mnt);
2425 path_put(&save_parent); 2716 path_put(&save_parent);
2426 terminate_walk(nd); 2717 terminate_walk(nd);
2427 return filp; 2718 return error;
2428 2719
2429exit_mutex_unlock:
2430 mutex_unlock(&dir->d_inode->i_mutex);
2431exit_dput: 2720exit_dput:
2432 path_put_conditional(path, nd); 2721 path_put_conditional(path, nd);
2433exit:
2434 filp = ERR_PTR(error);
2435 goto out; 2722 goto out;
2723exit_fput:
2724 fput(file);
2725 goto out;
2726
2727stale_open:
2728 /* If no saved parent or already retried then can't retry */
2729 if (!save_parent.dentry || retried)
2730 goto out;
2731
2732 BUG_ON(save_parent.dentry != dir);
2733 path_put(&nd->path);
2734 nd->path = save_parent;
2735 nd->inode = dir->d_inode;
2736 save_parent.mnt = NULL;
2737 save_parent.dentry = NULL;
2738 if (want_write) {
2739 mnt_drop_write(nd->path.mnt);
2740 want_write = false;
2741 }
2742 retried = true;
2743 goto retry_lookup;
2436} 2744}
2437 2745
2438static struct file *path_openat(int dfd, const char *pathname, 2746static struct file *path_openat(int dfd, const char *pathname,
2439 struct nameidata *nd, const struct open_flags *op, int flags) 2747 struct nameidata *nd, const struct open_flags *op, int flags)
2440{ 2748{
2441 struct file *base = NULL; 2749 struct file *base = NULL;
2442 struct file *filp; 2750 struct file *file;
2443 struct path path; 2751 struct path path;
2752 int opened = 0;
2444 int error; 2753 int error;
2445 2754
2446 filp = get_empty_filp(); 2755 file = get_empty_filp();
2447 if (!filp) 2756 if (!file)
2448 return ERR_PTR(-ENFILE); 2757 return ERR_PTR(-ENFILE);
2449 2758
2450 filp->f_flags = op->open_flag; 2759 file->f_flags = op->open_flag;
2451 nd->intent.open.file = filp;
2452 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2453 nd->intent.open.create_mode = op->mode;
2454 2760
2455 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); 2761 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2456 if (unlikely(error)) 2762 if (unlikely(error))
2457 goto out_filp; 2763 goto out;
2458 2764
2459 current->total_link_count = 0; 2765 current->total_link_count = 0;
2460 error = link_path_walk(pathname, nd); 2766 error = link_path_walk(pathname, nd);
2461 if (unlikely(error)) 2767 if (unlikely(error))
2462 goto out_filp; 2768 goto out;
2463 2769
2464 filp = do_last(nd, &path, op, pathname); 2770 error = do_last(nd, &path, file, op, &opened, pathname);
2465 while (unlikely(!filp)) { /* trailing symlink */ 2771 while (unlikely(error > 0)) { /* trailing symlink */
2466 struct path link = path; 2772 struct path link = path;
2467 void *cookie; 2773 void *cookie;
2468 if (!(nd->flags & LOOKUP_FOLLOW)) { 2774 if (!(nd->flags & LOOKUP_FOLLOW)) {
2469 path_put_conditional(&path, nd); 2775 path_put_conditional(&path, nd);
2470 path_put(&nd->path); 2776 path_put(&nd->path);
2471 filp = ERR_PTR(-ELOOP); 2777 error = -ELOOP;
2472 break; 2778 break;
2473 } 2779 }
2474 nd->flags |= LOOKUP_PARENT; 2780 nd->flags |= LOOKUP_PARENT;
2475 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); 2781 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2476 error = follow_link(&link, nd, &cookie); 2782 error = follow_link(&link, nd, &cookie);
2477 if (unlikely(error)) 2783 if (unlikely(error))
2478 filp = ERR_PTR(error); 2784 break;
2479 else 2785 error = do_last(nd, &path, file, op, &opened, pathname);
2480 filp = do_last(nd, &path, op, pathname);
2481 put_link(nd, &link, cookie); 2786 put_link(nd, &link, cookie);
2482 } 2787 }
2483out: 2788out:
@@ -2485,18 +2790,20 @@ out:
2485 path_put(&nd->root); 2790 path_put(&nd->root);
2486 if (base) 2791 if (base)
2487 fput(base); 2792 fput(base);
2488 release_open_intent(nd); 2793 if (!(opened & FILE_OPENED)) {
2489 if (filp == ERR_PTR(-EOPENSTALE)) { 2794 BUG_ON(!error);
2490 if (flags & LOOKUP_RCU) 2795 put_filp(file);
2491 filp = ERR_PTR(-ECHILD);
2492 else
2493 filp = ERR_PTR(-ESTALE);
2494 } 2796 }
2495 return filp; 2797 if (unlikely(error)) {
2496 2798 if (error == -EOPENSTALE) {
2497out_filp: 2799 if (flags & LOOKUP_RCU)
2498 filp = ERR_PTR(error); 2800 error = -ECHILD;
2499 goto out; 2801 else
2802 error = -ESTALE;
2803 }
2804 file = ERR_PTR(error);
2805 }
2806 return file;
2500} 2807}
2501 2808
2502struct file *do_filp_open(int dfd, const char *pathname, 2809struct file *do_filp_open(int dfd, const char *pathname,
@@ -2551,7 +2858,6 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
2551 goto out; 2858 goto out;
2552 nd.flags &= ~LOOKUP_PARENT; 2859 nd.flags &= ~LOOKUP_PARENT;
2553 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; 2860 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
2554 nd.intent.open.flags = O_EXCL;
2555 2861
2556 /* 2862 /*
2557 * Do the final lookup. 2863 * Do the final lookup.
@@ -2670,7 +2976,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2670 goto out_drop_write; 2976 goto out_drop_write;
2671 switch (mode & S_IFMT) { 2977 switch (mode & S_IFMT) {
2672 case 0: case S_IFREG: 2978 case 0: case S_IFREG:
2673 error = vfs_create(path.dentry->d_inode,dentry,mode,NULL); 2979 error = vfs_create(path.dentry->d_inode,dentry,mode,true);
2674 break; 2980 break;
2675 case S_IFCHR: case S_IFBLK: 2981 case S_IFCHR: case S_IFBLK:
2676 error = vfs_mknod(path.dentry->d_inode,dentry,mode, 2982 error = vfs_mknod(path.dentry->d_inode,dentry,mode,
diff --git a/fs/namespace.c b/fs/namespace.c
index 1e4a5fe3d7b7..c53d3381b0d0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -515,8 +515,20 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
515} 515}
516 516
517/* 517/*
518 * lookup_mnt increments the ref count before returning 518 * lookup_mnt - Return the first child mount mounted at path
519 * the vfsmount struct. 519 *
520 * "First" means first mounted chronologically. If you create the
521 * following mounts:
522 *
523 * mount /dev/sda1 /mnt
524 * mount /dev/sda2 /mnt
525 * mount /dev/sda3 /mnt
526 *
527 * Then lookup_mnt() on the base /mnt dentry in the root mount will
528 * return successively the root dentry and vfsmount of /dev/sda1, then
529 * /dev/sda2, then /dev/sda3, then NULL.
530 *
531 * lookup_mnt takes a reference to the found vfsmount.
520 */ 532 */
521struct vfsmount *lookup_mnt(struct path *path) 533struct vfsmount *lookup_mnt(struct path *path)
522{ 534{
@@ -621,21 +633,6 @@ static void attach_mnt(struct mount *mnt, struct path *path)
621 list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts); 633 list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
622} 634}
623 635
624static inline void __mnt_make_longterm(struct mount *mnt)
625{
626#ifdef CONFIG_SMP
627 atomic_inc(&mnt->mnt_longterm);
628#endif
629}
630
631/* needs vfsmount lock for write */
632static inline void __mnt_make_shortterm(struct mount *mnt)
633{
634#ifdef CONFIG_SMP
635 atomic_dec(&mnt->mnt_longterm);
636#endif
637}
638
639/* 636/*
640 * vfsmount lock must be held for write 637 * vfsmount lock must be held for write
641 */ 638 */
@@ -649,10 +646,8 @@ static void commit_tree(struct mount *mnt)
649 BUG_ON(parent == mnt); 646 BUG_ON(parent == mnt);
650 647
651 list_add_tail(&head, &mnt->mnt_list); 648 list_add_tail(&head, &mnt->mnt_list);
652 list_for_each_entry(m, &head, mnt_list) { 649 list_for_each_entry(m, &head, mnt_list)
653 m->mnt_ns = n; 650 m->mnt_ns = n;
654 __mnt_make_longterm(m);
655 }
656 651
657 list_splice(&head, n->list.prev); 652 list_splice(&head, n->list.prev);
658 653
@@ -725,56 +720,60 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
725 int flag) 720 int flag)
726{ 721{
727 struct super_block *sb = old->mnt.mnt_sb; 722 struct super_block *sb = old->mnt.mnt_sb;
728 struct mount *mnt = alloc_vfsmnt(old->mnt_devname); 723 struct mount *mnt;
724 int err;
729 725
730 if (mnt) { 726 mnt = alloc_vfsmnt(old->mnt_devname);
731 if (flag & (CL_SLAVE | CL_PRIVATE)) 727 if (!mnt)
732 mnt->mnt_group_id = 0; /* not a peer of original */ 728 return ERR_PTR(-ENOMEM);
733 else
734 mnt->mnt_group_id = old->mnt_group_id;
735
736 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
737 int err = mnt_alloc_group_id(mnt);
738 if (err)
739 goto out_free;
740 }
741 729
742 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; 730 if (flag & (CL_SLAVE | CL_PRIVATE))
743 atomic_inc(&sb->s_active); 731 mnt->mnt_group_id = 0; /* not a peer of original */
744 mnt->mnt.mnt_sb = sb; 732 else
745 mnt->mnt.mnt_root = dget(root); 733 mnt->mnt_group_id = old->mnt_group_id;
746 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
747 mnt->mnt_parent = mnt;
748 br_write_lock(&vfsmount_lock);
749 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
750 br_write_unlock(&vfsmount_lock);
751 734
752 if (flag & CL_SLAVE) { 735 if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
753 list_add(&mnt->mnt_slave, &old->mnt_slave_list); 736 err = mnt_alloc_group_id(mnt);
754 mnt->mnt_master = old; 737 if (err)
755 CLEAR_MNT_SHARED(mnt); 738 goto out_free;
756 } else if (!(flag & CL_PRIVATE)) {
757 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
758 list_add(&mnt->mnt_share, &old->mnt_share);
759 if (IS_MNT_SLAVE(old))
760 list_add(&mnt->mnt_slave, &old->mnt_slave);
761 mnt->mnt_master = old->mnt_master;
762 }
763 if (flag & CL_MAKE_SHARED)
764 set_mnt_shared(mnt);
765
766 /* stick the duplicate mount on the same expiry list
767 * as the original if that was on one */
768 if (flag & CL_EXPIRE) {
769 if (!list_empty(&old->mnt_expire))
770 list_add(&mnt->mnt_expire, &old->mnt_expire);
771 }
772 } 739 }
740
741 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
742 atomic_inc(&sb->s_active);
743 mnt->mnt.mnt_sb = sb;
744 mnt->mnt.mnt_root = dget(root);
745 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
746 mnt->mnt_parent = mnt;
747 br_write_lock(&vfsmount_lock);
748 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
749 br_write_unlock(&vfsmount_lock);
750
751 if (flag & CL_SLAVE) {
752 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
753 mnt->mnt_master = old;
754 CLEAR_MNT_SHARED(mnt);
755 } else if (!(flag & CL_PRIVATE)) {
756 if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
757 list_add(&mnt->mnt_share, &old->mnt_share);
758 if (IS_MNT_SLAVE(old))
759 list_add(&mnt->mnt_slave, &old->mnt_slave);
760 mnt->mnt_master = old->mnt_master;
761 }
762 if (flag & CL_MAKE_SHARED)
763 set_mnt_shared(mnt);
764
765 /* stick the duplicate mount on the same expiry list
766 * as the original if that was on one */
767 if (flag & CL_EXPIRE) {
768 if (!list_empty(&old->mnt_expire))
769 list_add(&mnt->mnt_expire, &old->mnt_expire);
770 }
771
773 return mnt; 772 return mnt;
774 773
775 out_free: 774 out_free:
776 free_vfsmnt(mnt); 775 free_vfsmnt(mnt);
777 return NULL; 776 return ERR_PTR(err);
778} 777}
779 778
780static inline void mntfree(struct mount *mnt) 779static inline void mntfree(struct mount *mnt)
@@ -804,7 +803,8 @@ static void mntput_no_expire(struct mount *mnt)
804put_again: 803put_again:
805#ifdef CONFIG_SMP 804#ifdef CONFIG_SMP
806 br_read_lock(&vfsmount_lock); 805 br_read_lock(&vfsmount_lock);
807 if (likely(atomic_read(&mnt->mnt_longterm))) { 806 if (likely(mnt->mnt_ns)) {
807 /* shouldn't be the last one */
808 mnt_add_count(mnt, -1); 808 mnt_add_count(mnt, -1);
809 br_read_unlock(&vfsmount_lock); 809 br_read_unlock(&vfsmount_lock);
810 return; 810 return;
@@ -939,7 +939,7 @@ EXPORT_SYMBOL(replace_mount_options);
939/* iterator; we want it to have access to namespace_sem, thus here... */ 939/* iterator; we want it to have access to namespace_sem, thus here... */
940static void *m_start(struct seq_file *m, loff_t *pos) 940static void *m_start(struct seq_file *m, loff_t *pos)
941{ 941{
942 struct proc_mounts *p = container_of(m, struct proc_mounts, m); 942 struct proc_mounts *p = proc_mounts(m);
943 943
944 down_read(&namespace_sem); 944 down_read(&namespace_sem);
945 return seq_list_start(&p->ns->list, *pos); 945 return seq_list_start(&p->ns->list, *pos);
@@ -947,7 +947,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
947 947
948static void *m_next(struct seq_file *m, void *v, loff_t *pos) 948static void *m_next(struct seq_file *m, void *v, loff_t *pos)
949{ 949{
950 struct proc_mounts *p = container_of(m, struct proc_mounts, m); 950 struct proc_mounts *p = proc_mounts(m);
951 951
952 return seq_list_next(v, &p->ns->list, pos); 952 return seq_list_next(v, &p->ns->list, pos);
953} 953}
@@ -959,7 +959,7 @@ static void m_stop(struct seq_file *m, void *v)
959 959
960static int m_show(struct seq_file *m, void *v) 960static int m_show(struct seq_file *m, void *v)
961{ 961{
962 struct proc_mounts *p = container_of(m, struct proc_mounts, m); 962 struct proc_mounts *p = proc_mounts(m);
963 struct mount *r = list_entry(v, struct mount, mnt_list); 963 struct mount *r = list_entry(v, struct mount, mnt_list);
964 return p->show(m, &r->mnt); 964 return p->show(m, &r->mnt);
965} 965}
@@ -1074,8 +1074,6 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
1074 list_del_init(&p->mnt_expire); 1074 list_del_init(&p->mnt_expire);
1075 list_del_init(&p->mnt_list); 1075 list_del_init(&p->mnt_list);
1076 __touch_mnt_namespace(p->mnt_ns); 1076 __touch_mnt_namespace(p->mnt_ns);
1077 if (p->mnt_ns)
1078 __mnt_make_shortterm(p);
1079 p->mnt_ns = NULL; 1077 p->mnt_ns = NULL;
1080 list_del_init(&p->mnt_child); 1078 list_del_init(&p->mnt_child);
1081 if (mnt_has_parent(p)) { 1079 if (mnt_has_parent(p)) {
@@ -1260,11 +1258,12 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1260 struct path path; 1258 struct path path;
1261 1259
1262 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1260 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1263 return NULL; 1261 return ERR_PTR(-EINVAL);
1264 1262
1265 res = q = clone_mnt(mnt, dentry, flag); 1263 res = q = clone_mnt(mnt, dentry, flag);
1266 if (!q) 1264 if (IS_ERR(q))
1267 goto Enomem; 1265 return q;
1266
1268 q->mnt_mountpoint = mnt->mnt_mountpoint; 1267 q->mnt_mountpoint = mnt->mnt_mountpoint;
1269 1268
1270 p = mnt; 1269 p = mnt;
@@ -1286,8 +1285,8 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1286 path.mnt = &q->mnt; 1285 path.mnt = &q->mnt;
1287 path.dentry = p->mnt_mountpoint; 1286 path.dentry = p->mnt_mountpoint;
1288 q = clone_mnt(p, p->mnt.mnt_root, flag); 1287 q = clone_mnt(p, p->mnt.mnt_root, flag);
1289 if (!q) 1288 if (IS_ERR(q))
1290 goto Enomem; 1289 goto out;
1291 br_write_lock(&vfsmount_lock); 1290 br_write_lock(&vfsmount_lock);
1292 list_add_tail(&q->mnt_list, &res->mnt_list); 1291 list_add_tail(&q->mnt_list, &res->mnt_list);
1293 attach_mnt(q, &path); 1292 attach_mnt(q, &path);
@@ -1295,7 +1294,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1295 } 1294 }
1296 } 1295 }
1297 return res; 1296 return res;
1298Enomem: 1297out:
1299 if (res) { 1298 if (res) {
1300 LIST_HEAD(umount_list); 1299 LIST_HEAD(umount_list);
1301 br_write_lock(&vfsmount_lock); 1300 br_write_lock(&vfsmount_lock);
@@ -1303,9 +1302,11 @@ Enomem:
1303 br_write_unlock(&vfsmount_lock); 1302 br_write_unlock(&vfsmount_lock);
1304 release_mounts(&umount_list); 1303 release_mounts(&umount_list);
1305 } 1304 }
1306 return NULL; 1305 return q;
1307} 1306}
1308 1307
1308/* Caller should check returned pointer for errors */
1309
1309struct vfsmount *collect_mounts(struct path *path) 1310struct vfsmount *collect_mounts(struct path *path)
1310{ 1311{
1311 struct mount *tree; 1312 struct mount *tree;
@@ -1313,7 +1314,9 @@ struct vfsmount *collect_mounts(struct path *path)
1313 tree = copy_tree(real_mount(path->mnt), path->dentry, 1314 tree = copy_tree(real_mount(path->mnt), path->dentry,
1314 CL_COPY_ALL | CL_PRIVATE); 1315 CL_COPY_ALL | CL_PRIVATE);
1315 up_write(&namespace_sem); 1316 up_write(&namespace_sem);
1316 return tree ? &tree->mnt : NULL; 1317 if (IS_ERR(tree))
1318 return NULL;
1319 return &tree->mnt;
1317} 1320}
1318 1321
1319void drop_collected_mounts(struct vfsmount *mnt) 1322void drop_collected_mounts(struct vfsmount *mnt)
@@ -1608,14 +1611,15 @@ static int do_loopback(struct path *path, char *old_name,
1608 if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) 1611 if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
1609 goto out2; 1612 goto out2;
1610 1613
1611 err = -ENOMEM;
1612 if (recurse) 1614 if (recurse)
1613 mnt = copy_tree(old, old_path.dentry, 0); 1615 mnt = copy_tree(old, old_path.dentry, 0);
1614 else 1616 else
1615 mnt = clone_mnt(old, old_path.dentry, 0); 1617 mnt = clone_mnt(old, old_path.dentry, 0);
1616 1618
1617 if (!mnt) 1619 if (IS_ERR(mnt)) {
1618 goto out2; 1620 err = PTR_ERR(mnt);
1621 goto out;
1622 }
1619 1623
1620 err = graft_tree(mnt, path); 1624 err = graft_tree(mnt, path);
1621 if (err) { 1625 if (err) {
@@ -2209,23 +2213,6 @@ static struct mnt_namespace *alloc_mnt_ns(void)
2209 return new_ns; 2213 return new_ns;
2210} 2214}
2211 2215
2212void mnt_make_longterm(struct vfsmount *mnt)
2213{
2214 __mnt_make_longterm(real_mount(mnt));
2215}
2216
2217void mnt_make_shortterm(struct vfsmount *m)
2218{
2219#ifdef CONFIG_SMP
2220 struct mount *mnt = real_mount(m);
2221 if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
2222 return;
2223 br_write_lock(&vfsmount_lock);
2224 atomic_dec(&mnt->mnt_longterm);
2225 br_write_unlock(&vfsmount_lock);
2226#endif
2227}
2228
2229/* 2216/*
2230 * Allocate a new namespace structure and populate it with contents 2217 * Allocate a new namespace structure and populate it with contents
2231 * copied from the namespace of the passed in task structure. 2218 * copied from the namespace of the passed in task structure.
@@ -2246,10 +2233,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2246 down_write(&namespace_sem); 2233 down_write(&namespace_sem);
2247 /* First pass: copy the tree topology */ 2234 /* First pass: copy the tree topology */
2248 new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); 2235 new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
2249 if (!new) { 2236 if (IS_ERR(new)) {
2250 up_write(&namespace_sem); 2237 up_write(&namespace_sem);
2251 kfree(new_ns); 2238 kfree(new_ns);
2252 return ERR_PTR(-ENOMEM); 2239 return ERR_CAST(new);
2253 } 2240 }
2254 new_ns->root = new; 2241 new_ns->root = new;
2255 br_write_lock(&vfsmount_lock); 2242 br_write_lock(&vfsmount_lock);
@@ -2265,18 +2252,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2265 q = new; 2252 q = new;
2266 while (p) { 2253 while (p) {
2267 q->mnt_ns = new_ns; 2254 q->mnt_ns = new_ns;
2268 __mnt_make_longterm(q);
2269 if (fs) { 2255 if (fs) {
2270 if (&p->mnt == fs->root.mnt) { 2256 if (&p->mnt == fs->root.mnt) {
2271 fs->root.mnt = mntget(&q->mnt); 2257 fs->root.mnt = mntget(&q->mnt);
2272 __mnt_make_longterm(q);
2273 mnt_make_shortterm(&p->mnt);
2274 rootmnt = &p->mnt; 2258 rootmnt = &p->mnt;
2275 } 2259 }
2276 if (&p->mnt == fs->pwd.mnt) { 2260 if (&p->mnt == fs->pwd.mnt) {
2277 fs->pwd.mnt = mntget(&q->mnt); 2261 fs->pwd.mnt = mntget(&q->mnt);
2278 __mnt_make_longterm(q);
2279 mnt_make_shortterm(&p->mnt);
2280 pwdmnt = &p->mnt; 2262 pwdmnt = &p->mnt;
2281 } 2263 }
2282 } 2264 }
@@ -2320,7 +2302,6 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2320 if (!IS_ERR(new_ns)) { 2302 if (!IS_ERR(new_ns)) {
2321 struct mount *mnt = real_mount(m); 2303 struct mount *mnt = real_mount(m);
2322 mnt->mnt_ns = new_ns; 2304 mnt->mnt_ns = new_ns;
2323 __mnt_make_longterm(mnt);
2324 new_ns->root = mnt; 2305 new_ns->root = mnt;
2325 list_add(&new_ns->list, &mnt->mnt_list); 2306 list_add(&new_ns->list, &mnt->mnt_list);
2326 } else { 2307 } else {
@@ -2615,7 +2596,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
2615 * it is a longterm mount, don't release mnt until 2596 * it is a longterm mount, don't release mnt until
2616 * we unmount before file sys is unregistered 2597 * we unmount before file sys is unregistered
2617 */ 2598 */
2618 mnt_make_longterm(mnt); 2599 real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
2619 } 2600 }
2620 return mnt; 2601 return mnt;
2621} 2602}
@@ -2625,7 +2606,9 @@ void kern_unmount(struct vfsmount *mnt)
2625{ 2606{
2626 /* release long term mount so mount point can be released */ 2607 /* release long term mount so mount point can be released */
2627 if (!IS_ERR_OR_NULL(mnt)) { 2608 if (!IS_ERR_OR_NULL(mnt)) {
2628 mnt_make_shortterm(mnt); 2609 br_write_lock(&vfsmount_lock);
2610 real_mount(mnt)->mnt_ns = NULL;
2611 br_write_unlock(&vfsmount_lock);
2629 mntput(mnt); 2612 mntput(mnt);
2630 } 2613 }
2631} 2614}
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index aeed93a6bde0..4117e7b377bb 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -30,8 +30,8 @@ static void ncp_do_readdir(struct file *, void *, filldir_t,
30 30
31static int ncp_readdir(struct file *, void *, filldir_t); 31static int ncp_readdir(struct file *, void *, filldir_t);
32 32
33static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *); 33static int ncp_create(struct inode *, struct dentry *, umode_t, bool);
34static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *); 34static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int);
35static int ncp_unlink(struct inode *, struct dentry *); 35static int ncp_unlink(struct inode *, struct dentry *);
36static int ncp_mkdir(struct inode *, struct dentry *, umode_t); 36static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
37static int ncp_rmdir(struct inode *, struct dentry *); 37static int ncp_rmdir(struct inode *, struct dentry *);
@@ -72,7 +72,7 @@ const struct inode_operations ncp_dir_inode_operations =
72/* 72/*
73 * Dentry operations routines 73 * Dentry operations routines
74 */ 74 */
75static int ncp_lookup_validate(struct dentry *, struct nameidata *); 75static int ncp_lookup_validate(struct dentry *, unsigned int);
76static int ncp_hash_dentry(const struct dentry *, const struct inode *, 76static int ncp_hash_dentry(const struct dentry *, const struct inode *,
77 struct qstr *); 77 struct qstr *);
78static int ncp_compare_dentry(const struct dentry *, const struct inode *, 78static int ncp_compare_dentry(const struct dentry *, const struct inode *,
@@ -290,7 +290,7 @@ leave_me:;
290 290
291 291
292static int 292static int
293ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd) 293ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
294{ 294{
295 struct ncp_server *server; 295 struct ncp_server *server;
296 struct dentry *parent; 296 struct dentry *parent;
@@ -302,7 +302,7 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd)
302 if (dentry == dentry->d_sb->s_root) 302 if (dentry == dentry->d_sb->s_root)
303 return 1; 303 return 1;
304 304
305 if (nd->flags & LOOKUP_RCU) 305 if (flags & LOOKUP_RCU)
306 return -ECHILD; 306 return -ECHILD;
307 307
308 parent = dget_parent(dentry); 308 parent = dget_parent(dentry);
@@ -836,7 +836,7 @@ out:
836 return result; 836 return result;
837} 837}
838 838
839static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 839static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
840{ 840{
841 struct ncp_server *server = NCP_SERVER(dir); 841 struct ncp_server *server = NCP_SERVER(dir);
842 struct inode *inode = NULL; 842 struct inode *inode = NULL;
@@ -980,7 +980,7 @@ out:
980} 980}
981 981
982static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode, 982static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode,
983 struct nameidata *nd) 983 bool excl)
984{ 984{
985 return ncp_create_new(dir, dentry, mode, 0, 0); 985 return ncp_create_new(dir, dentry, mode, 0, 0);
986} 986}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f430057ff3b3..a6b1c7fb8232 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -46,8 +46,8 @@
46static int nfs_opendir(struct inode *, struct file *); 46static int nfs_opendir(struct inode *, struct file *);
47static int nfs_closedir(struct inode *, struct file *); 47static int nfs_closedir(struct inode *, struct file *);
48static int nfs_readdir(struct file *, void *, filldir_t); 48static int nfs_readdir(struct file *, void *, filldir_t);
49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); 49static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
50static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); 50static int nfs_create(struct inode *, struct dentry *, umode_t, bool);
51static int nfs_mkdir(struct inode *, struct dentry *, umode_t); 51static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
52static int nfs_rmdir(struct inode *, struct dentry *); 52static int nfs_rmdir(struct inode *, struct dentry *);
53static int nfs_unlink(struct inode *, struct dentry *); 53static int nfs_unlink(struct inode *, struct dentry *);
@@ -111,11 +111,13 @@ const struct inode_operations nfs3_dir_inode_operations = {
111 111
112#ifdef CONFIG_NFS_V4 112#ifdef CONFIG_NFS_V4
113 113
114static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); 114static int nfs_atomic_open(struct inode *, struct dentry *,
115static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd); 115 struct file *, unsigned, umode_t,
116 int *);
116const struct inode_operations nfs4_dir_inode_operations = { 117const struct inode_operations nfs4_dir_inode_operations = {
117 .create = nfs_open_create, 118 .create = nfs_create,
118 .lookup = nfs_atomic_lookup, 119 .lookup = nfs_lookup,
120 .atomic_open = nfs_atomic_open,
119 .link = nfs_link, 121 .link = nfs_link,
120 .unlink = nfs_unlink, 122 .unlink = nfs_unlink,
121 .symlink = nfs_symlink, 123 .symlink = nfs_symlink,
@@ -1029,27 +1031,14 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
1029} 1031}
1030 1032
1031/* 1033/*
1032 * Return the intent data that applies to this particular path component
1033 *
1034 * Note that the current set of intents only apply to the very last
1035 * component of the path and none of them is set before that last
1036 * component.
1037 */
1038static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
1039 unsigned int mask)
1040{
1041 return nd->flags & mask;
1042}
1043
1044/*
1045 * Use intent information to check whether or not we're going to do 1034 * Use intent information to check whether or not we're going to do
1046 * an O_EXCL create using this path component. 1035 * an O_EXCL create using this path component.
1047 */ 1036 */
1048static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) 1037static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
1049{ 1038{
1050 if (NFS_PROTO(dir)->version == 2) 1039 if (NFS_PROTO(dir)->version == 2)
1051 return 0; 1040 return 0;
1052 return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); 1041 return flags & LOOKUP_EXCL;
1053} 1042}
1054 1043
1055/* 1044/*
@@ -1061,25 +1050,20 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
1061 * 1050 *
1062 */ 1051 */
1063static inline 1052static inline
1064int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) 1053int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1065{ 1054{
1066 struct nfs_server *server = NFS_SERVER(inode); 1055 struct nfs_server *server = NFS_SERVER(inode);
1067 1056
1068 if (IS_AUTOMOUNT(inode)) 1057 if (IS_AUTOMOUNT(inode))
1069 return 0; 1058 return 0;
1070 if (nd != NULL) { 1059 /* VFS wants an on-the-wire revalidation */
1071 /* VFS wants an on-the-wire revalidation */ 1060 if (flags & LOOKUP_REVAL)
1072 if (nd->flags & LOOKUP_REVAL) 1061 goto out_force;
1073 goto out_force; 1062 /* This is an open(2) */
1074 /* This is an open(2) */ 1063 if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) &&
1075 if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && 1064 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
1076 !(server->flags & NFS_MOUNT_NOCTO) && 1065 goto out_force;
1077 (S_ISREG(inode->i_mode) || 1066 return 0;
1078 S_ISDIR(inode->i_mode)))
1079 goto out_force;
1080 return 0;
1081 }
1082 return nfs_revalidate_inode(server, inode);
1083out_force: 1067out_force:
1084 return __nfs_revalidate_inode(server, inode); 1068 return __nfs_revalidate_inode(server, inode);
1085} 1069}
@@ -1093,10 +1077,10 @@ out_force:
1093 */ 1077 */
1094static inline 1078static inline
1095int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, 1079int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1096 struct nameidata *nd) 1080 unsigned int flags)
1097{ 1081{
1098 /* Don't revalidate a negative dentry if we're creating a new file */ 1082 /* Don't revalidate a negative dentry if we're creating a new file */
1099 if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) 1083 if (flags & LOOKUP_CREATE)
1100 return 0; 1084 return 0;
1101 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) 1085 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1102 return 1; 1086 return 1;
@@ -1114,7 +1098,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1114 * If the parent directory is seen to have changed, we throw out the 1098 * If the parent directory is seen to have changed, we throw out the
1115 * cached dentry and do a new lookup. 1099 * cached dentry and do a new lookup.
1116 */ 1100 */
1117static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) 1101static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1118{ 1102{
1119 struct inode *dir; 1103 struct inode *dir;
1120 struct inode *inode; 1104 struct inode *inode;
@@ -1123,7 +1107,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1123 struct nfs_fattr *fattr = NULL; 1107 struct nfs_fattr *fattr = NULL;
1124 int error; 1108 int error;
1125 1109
1126 if (nd->flags & LOOKUP_RCU) 1110 if (flags & LOOKUP_RCU)
1127 return -ECHILD; 1111 return -ECHILD;
1128 1112
1129 parent = dget_parent(dentry); 1113 parent = dget_parent(dentry);
@@ -1132,7 +1116,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1132 inode = dentry->d_inode; 1116 inode = dentry->d_inode;
1133 1117
1134 if (!inode) { 1118 if (!inode) {
1135 if (nfs_neg_need_reval(dir, dentry, nd)) 1119 if (nfs_neg_need_reval(dir, dentry, flags))
1136 goto out_bad; 1120 goto out_bad;
1137 goto out_valid_noent; 1121 goto out_valid_noent;
1138 } 1122 }
@@ -1148,8 +1132,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1148 goto out_set_verifier; 1132 goto out_set_verifier;
1149 1133
1150 /* Force a full look up iff the parent directory has changed */ 1134 /* Force a full look up iff the parent directory has changed */
1151 if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { 1135 if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) {
1152 if (nfs_lookup_verify_inode(inode, nd)) 1136 if (nfs_lookup_verify_inode(inode, flags))
1153 goto out_zap_parent; 1137 goto out_zap_parent;
1154 goto out_valid; 1138 goto out_valid;
1155 } 1139 }
@@ -1286,7 +1270,7 @@ const struct dentry_operations nfs_dentry_operations = {
1286 .d_release = nfs_d_release, 1270 .d_release = nfs_d_release,
1287}; 1271};
1288 1272
1289static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1273static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
1290{ 1274{
1291 struct dentry *res; 1275 struct dentry *res;
1292 struct dentry *parent; 1276 struct dentry *parent;
@@ -1307,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1307 * If we're doing an exclusive create, optimize away the lookup 1291 * If we're doing an exclusive create, optimize away the lookup
1308 * but don't hash the dentry. 1292 * but don't hash the dentry.
1309 */ 1293 */
1310 if (nfs_is_exclusive_create(dir, nd)) { 1294 if (nfs_is_exclusive_create(dir, flags)) {
1311 d_instantiate(dentry, NULL); 1295 d_instantiate(dentry, NULL);
1312 res = NULL; 1296 res = NULL;
1313 goto out; 1297 goto out;
@@ -1354,7 +1338,7 @@ out:
1354} 1338}
1355 1339
1356#ifdef CONFIG_NFS_V4 1340#ifdef CONFIG_NFS_V4
1357static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); 1341static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
1358 1342
1359const struct dentry_operations nfs4_dentry_operations = { 1343const struct dentry_operations nfs4_dentry_operations = {
1360 .d_revalidate = nfs4_lookup_revalidate, 1344 .d_revalidate = nfs4_lookup_revalidate,
@@ -1364,24 +1348,6 @@ const struct dentry_operations nfs4_dentry_operations = {
1364 .d_release = nfs_d_release, 1348 .d_release = nfs_d_release,
1365}; 1349};
1366 1350
1367/*
1368 * Use intent information to determine whether we need to substitute
1369 * the NFSv4-style stateful OPEN for the LOOKUP call
1370 */
1371static int is_atomic_open(struct nameidata *nd)
1372{
1373 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0)
1374 return 0;
1375 /* NFS does not (yet) have a stateful open for directories */
1376 if (nd->flags & LOOKUP_DIRECTORY)
1377 return 0;
1378 /* Are we trying to write to a read only partition? */
1379 if (__mnt_is_readonly(nd->path.mnt) &&
1380 (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE)))
1381 return 0;
1382 return 1;
1383}
1384
1385static fmode_t flags_to_mode(int flags) 1351static fmode_t flags_to_mode(int flags)
1386{ 1352{
1387 fmode_t res = (__force fmode_t)flags & FMODE_EXEC; 1353 fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
@@ -1403,136 +1369,143 @@ static int do_open(struct inode *inode, struct file *filp)
1403 return 0; 1369 return 0;
1404} 1370}
1405 1371
1406static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) 1372static int nfs_finish_open(struct nfs_open_context *ctx,
1373 struct dentry *dentry,
1374 struct file *file, unsigned open_flags,
1375 int *opened)
1407{ 1376{
1408 struct file *filp; 1377 int err;
1409 int ret = 0; 1378
1379 if (ctx->dentry != dentry) {
1380 dput(ctx->dentry);
1381 ctx->dentry = dget(dentry);
1382 }
1410 1383
1411 /* If the open_intent is for execute, we have an extra check to make */ 1384 /* If the open_intent is for execute, we have an extra check to make */
1412 if (ctx->mode & FMODE_EXEC) { 1385 if (ctx->mode & FMODE_EXEC) {
1413 ret = nfs_may_open(ctx->dentry->d_inode, 1386 err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags);
1414 ctx->cred, 1387 if (err < 0)
1415 nd->intent.open.flags);
1416 if (ret < 0)
1417 goto out; 1388 goto out;
1418 } 1389 }
1419 filp = lookup_instantiate_filp(nd, ctx->dentry, do_open); 1390
1420 if (IS_ERR(filp)) 1391 err = finish_open(file, dentry, do_open, opened);
1421 ret = PTR_ERR(filp); 1392 if (err)
1422 else 1393 goto out;
1423 nfs_file_set_open_context(filp, ctx); 1394 nfs_file_set_open_context(file, ctx);
1395
1424out: 1396out:
1425 put_nfs_open_context(ctx); 1397 put_nfs_open_context(ctx);
1426 return ret; 1398 return err;
1427} 1399}
1428 1400
1429static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1401static int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
1402 struct file *file, unsigned open_flags,
1403 umode_t mode, int *opened)
1430{ 1404{
1431 struct nfs_open_context *ctx; 1405 struct nfs_open_context *ctx;
1432 struct iattr attr; 1406 struct dentry *res;
1433 struct dentry *res = NULL; 1407 struct iattr attr = { .ia_valid = ATTR_OPEN };
1434 struct inode *inode; 1408 struct inode *inode;
1435 int open_flags;
1436 int err; 1409 int err;
1437 1410
1438 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", 1411 /* Expect a negative dentry */
1412 BUG_ON(dentry->d_inode);
1413
1414 dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n",
1439 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1415 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1440 1416
1441 /* Check that we are indeed trying to open this file */ 1417 /* NFS only supports OPEN on regular files */
1442 if (!is_atomic_open(nd)) 1418 if ((open_flags & O_DIRECTORY)) {
1419 if (!d_unhashed(dentry)) {
1420 /*
1421 * Hashed negative dentry with O_DIRECTORY: dentry was
1422 * revalidated and is fine, no need to perform lookup
1423 * again
1424 */
1425 return -ENOENT;
1426 }
1443 goto no_open; 1427 goto no_open;
1444
1445 if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
1446 res = ERR_PTR(-ENAMETOOLONG);
1447 goto out;
1448 }
1449
1450 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
1451 * the dentry. */
1452 if (nd->flags & LOOKUP_EXCL) {
1453 d_instantiate(dentry, NULL);
1454 goto out;
1455 } 1428 }
1456 1429
1457 open_flags = nd->intent.open.flags; 1430 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
1458 attr.ia_valid = ATTR_OPEN; 1431 return -ENAMETOOLONG;
1459
1460 ctx = create_nfs_open_context(dentry, open_flags);
1461 res = ERR_CAST(ctx);
1462 if (IS_ERR(ctx))
1463 goto out;
1464 1432
1465 if (nd->flags & LOOKUP_CREATE) { 1433 if (open_flags & O_CREAT) {
1466 attr.ia_mode = nd->intent.open.create_mode;
1467 attr.ia_valid |= ATTR_MODE; 1434 attr.ia_valid |= ATTR_MODE;
1468 attr.ia_mode &= ~current_umask(); 1435 attr.ia_mode = mode & ~current_umask();
1469 } else 1436 }
1470 open_flags &= ~(O_EXCL | O_CREAT);
1471
1472 if (open_flags & O_TRUNC) { 1437 if (open_flags & O_TRUNC) {
1473 attr.ia_valid |= ATTR_SIZE; 1438 attr.ia_valid |= ATTR_SIZE;
1474 attr.ia_size = 0; 1439 attr.ia_size = 0;
1475 } 1440 }
1476 1441
1477 /* Open the file on the server */ 1442 ctx = create_nfs_open_context(dentry, open_flags);
1443 err = PTR_ERR(ctx);
1444 if (IS_ERR(ctx))
1445 goto out;
1446
1478 nfs_block_sillyrename(dentry->d_parent); 1447 nfs_block_sillyrename(dentry->d_parent);
1479 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); 1448 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
1449 d_drop(dentry);
1480 if (IS_ERR(inode)) { 1450 if (IS_ERR(inode)) {
1481 nfs_unblock_sillyrename(dentry->d_parent); 1451 nfs_unblock_sillyrename(dentry->d_parent);
1482 put_nfs_open_context(ctx); 1452 put_nfs_open_context(ctx);
1483 switch (PTR_ERR(inode)) { 1453 err = PTR_ERR(inode);
1484 /* Make a negative dentry */ 1454 switch (err) {
1485 case -ENOENT: 1455 case -ENOENT:
1486 d_add(dentry, NULL); 1456 d_add(dentry, NULL);
1487 res = NULL; 1457 break;
1488 goto out; 1458 case -EISDIR:
1489 /* This turned out not to be a regular file */ 1459 case -ENOTDIR:
1490 case -EISDIR: 1460 goto no_open;
1491 case -ENOTDIR: 1461 case -ELOOP:
1462 if (!(open_flags & O_NOFOLLOW))
1492 goto no_open; 1463 goto no_open;
1493 case -ELOOP: 1464 break;
1494 if (!(nd->intent.open.flags & O_NOFOLLOW))
1495 goto no_open;
1496 /* case -EINVAL: */ 1465 /* case -EINVAL: */
1497 default: 1466 default:
1498 res = ERR_CAST(inode); 1467 break;
1499 goto out;
1500 } 1468 }
1469 goto out;
1501 } 1470 }
1502 res = d_add_unique(dentry, inode); 1471 res = d_add_unique(dentry, inode);
1503 nfs_unblock_sillyrename(dentry->d_parent); 1472 if (res != NULL)
1504 if (res != NULL) {
1505 dput(ctx->dentry);
1506 ctx->dentry = dget(res);
1507 dentry = res; 1473 dentry = res;
1508 } 1474
1509 err = nfs_intent_set_file(nd, ctx); 1475 nfs_unblock_sillyrename(dentry->d_parent);
1510 if (err < 0) {
1511 if (res != NULL)
1512 dput(res);
1513 return ERR_PTR(err);
1514 }
1515out:
1516 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 1476 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1517 return res; 1477
1478 err = nfs_finish_open(ctx, dentry, file, open_flags, opened);
1479
1480 dput(res);
1481out:
1482 return err;
1483
1518no_open: 1484no_open:
1519 return nfs_lookup(dir, dentry, nd); 1485 res = nfs_lookup(dir, dentry, 0);
1486 err = PTR_ERR(res);
1487 if (IS_ERR(res))
1488 goto out;
1489
1490 return finish_no_open(file, res);
1520} 1491}
1521 1492
1522static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) 1493static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1523{ 1494{
1524 struct dentry *parent = NULL; 1495 struct dentry *parent = NULL;
1525 struct inode *inode; 1496 struct inode *inode;
1526 struct inode *dir; 1497 struct inode *dir;
1527 int openflags, ret = 0; 1498 int ret = 0;
1528 1499
1529 if (nd->flags & LOOKUP_RCU) 1500 if (flags & LOOKUP_RCU)
1530 return -ECHILD; 1501 return -ECHILD;
1531 1502
1532 inode = dentry->d_inode; 1503 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
1533 if (!is_atomic_open(nd) || d_mountpoint(dentry)) 1504 goto no_open;
1505 if (d_mountpoint(dentry))
1534 goto no_open; 1506 goto no_open;
1535 1507
1508 inode = dentry->d_inode;
1536 parent = dget_parent(dentry); 1509 parent = dget_parent(dentry);
1537 dir = parent->d_inode; 1510 dir = parent->d_inode;
1538 1511
@@ -1540,7 +1513,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1540 * optimize away revalidation of negative dentries. 1513 * optimize away revalidation of negative dentries.
1541 */ 1514 */
1542 if (inode == NULL) { 1515 if (inode == NULL) {
1543 if (!nfs_neg_need_reval(dir, dentry, nd)) 1516 if (!nfs_neg_need_reval(dir, dentry, flags))
1544 ret = 1; 1517 ret = 1;
1545 goto out; 1518 goto out;
1546 } 1519 }
@@ -1548,9 +1521,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1548 /* NFS only supports OPEN on regular files */ 1521 /* NFS only supports OPEN on regular files */
1549 if (!S_ISREG(inode->i_mode)) 1522 if (!S_ISREG(inode->i_mode))
1550 goto no_open_dput; 1523 goto no_open_dput;
1551 openflags = nd->intent.open.flags;
1552 /* We cannot do exclusive creation on a positive dentry */ 1524 /* We cannot do exclusive creation on a positive dentry */
1553 if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) 1525 if (flags & LOOKUP_EXCL)
1554 goto no_open_dput; 1526 goto no_open_dput;
1555 1527
1556 /* Let f_op->open() actually open (and revalidate) the file */ 1528 /* Let f_op->open() actually open (and revalidate) the file */
@@ -1563,48 +1535,7 @@ out:
1563no_open_dput: 1535no_open_dput:
1564 dput(parent); 1536 dput(parent);
1565no_open: 1537no_open:
1566 return nfs_lookup_revalidate(dentry, nd); 1538 return nfs_lookup_revalidate(dentry, flags);
1567}
1568
1569static int nfs_open_create(struct inode *dir, struct dentry *dentry,
1570 umode_t mode, struct nameidata *nd)
1571{
1572 struct nfs_open_context *ctx = NULL;
1573 struct iattr attr;
1574 int error;
1575 int open_flags = O_CREAT|O_EXCL;
1576
1577 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1578 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1579
1580 attr.ia_mode = mode;
1581 attr.ia_valid = ATTR_MODE;
1582
1583 if (nd)
1584 open_flags = nd->intent.open.flags;
1585
1586 ctx = create_nfs_open_context(dentry, open_flags);
1587 error = PTR_ERR(ctx);
1588 if (IS_ERR(ctx))
1589 goto out_err_drop;
1590
1591 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
1592 if (error != 0)
1593 goto out_put_ctx;
1594 if (nd) {
1595 error = nfs_intent_set_file(nd, ctx);
1596 if (error < 0)
1597 goto out_err;
1598 } else {
1599 put_nfs_open_context(ctx);
1600 }
1601 return 0;
1602out_put_ctx:
1603 put_nfs_open_context(ctx);
1604out_err_drop:
1605 d_drop(dentry);
1606out_err:
1607 return error;
1608} 1539}
1609 1540
1610#endif /* CONFIG_NFSV4 */ 1541#endif /* CONFIG_NFSV4 */
@@ -1658,11 +1589,11 @@ out_error:
1658 * reply path made it appear to have failed. 1589 * reply path made it appear to have failed.
1659 */ 1590 */
1660static int nfs_create(struct inode *dir, struct dentry *dentry, 1591static int nfs_create(struct inode *dir, struct dentry *dentry,
1661 umode_t mode, struct nameidata *nd) 1592 umode_t mode, bool excl)
1662{ 1593{
1663 struct iattr attr; 1594 struct iattr attr;
1595 int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
1664 int error; 1596 int error;
1665 int open_flags = O_CREAT|O_EXCL;
1666 1597
1667 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1598 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1668 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1599 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1670,10 +1601,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry,
1670 attr.ia_mode = mode; 1601 attr.ia_mode = mode;
1671 attr.ia_valid = ATTR_MODE; 1602 attr.ia_valid = ATTR_MODE;
1672 1603
1673 if (nd) 1604 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
1674 open_flags = nd->intent.open.flags;
1675
1676 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL);
1677 if (error != 0) 1605 if (error != 0)
1678 goto out_err; 1606 goto out_err;
1679 return 0; 1607 return 0;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 8abfb19bd3aa..a67990f90bd7 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
62 */ 62 */
63 spin_lock(&sb->s_root->d_inode->i_lock); 63 spin_lock(&sb->s_root->d_inode->i_lock);
64 spin_lock(&sb->s_root->d_lock); 64 spin_lock(&sb->s_root->d_lock);
65 list_del_init(&sb->s_root->d_alias); 65 hlist_del_init(&sb->s_root->d_alias);
66 spin_unlock(&sb->s_root->d_lock); 66 spin_unlock(&sb->s_root->d_lock);
67 spin_unlock(&sb->s_root->d_inode->i_lock); 67 spin_unlock(&sb->s_root->d_inode->i_lock);
68 } 68 }
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2292a0fd2bff..3187e24e8f78 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -314,7 +314,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
314 */ 314 */
315static int 315static int
316nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 316nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
317 int flags, struct nfs_open_context *ctx) 317 int flags)
318{ 318{
319 struct nfs3_createdata *data; 319 struct nfs3_createdata *data;
320 umode_t mode = sattr->ia_mode; 320 umode_t mode = sattr->ia_mode;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 15fc7e4664ed..c157b2089b47 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2806,37 +2806,22 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
2806} 2806}
2807 2807
2808/* 2808/*
2809 * Got race? 2809 * This is just for mknod. open(O_CREAT) will always do ->open_context().
2810 * We will need to arrange for the VFS layer to provide an atomic open.
2811 * Until then, this create/open method is prone to inefficiency and race
2812 * conditions due to the lookup, create, and open VFS calls from sys_open()
2813 * placed on the wire.
2814 *
2815 * Given the above sorry state of affairs, I'm simply sending an OPEN.
2816 * The file will be opened again in the subsequent VFS open call
2817 * (nfs4_proc_file_open).
2818 *
2819 * The open for read will just hang around to be used by any process that
2820 * opens the file O_RDONLY. This will all be resolved with the VFS changes.
2821 */ 2810 */
2822
2823static int 2811static int
2824nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 2812nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2825 int flags, struct nfs_open_context *ctx) 2813 int flags)
2826{ 2814{
2827 struct dentry *de = dentry; 2815 struct nfs_open_context *ctx;
2828 struct nfs4_state *state; 2816 struct nfs4_state *state;
2829 struct rpc_cred *cred = NULL;
2830 fmode_t fmode = 0;
2831 int status = 0; 2817 int status = 0;
2832 2818
2833 if (ctx != NULL) { 2819 ctx = alloc_nfs_open_context(dentry, FMODE_READ);
2834 cred = ctx->cred; 2820 if (IS_ERR(ctx))
2835 de = ctx->dentry; 2821 return PTR_ERR(ctx);
2836 fmode = ctx->mode; 2822
2837 }
2838 sattr->ia_mode &= ~current_umask(); 2823 sattr->ia_mode &= ~current_umask();
2839 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); 2824 state = nfs4_do_open(dir, dentry, ctx->mode, flags, sattr, ctx->cred, NULL);
2840 d_drop(dentry); 2825 d_drop(dentry);
2841 if (IS_ERR(state)) { 2826 if (IS_ERR(state)) {
2842 status = PTR_ERR(state); 2827 status = PTR_ERR(state);
@@ -2844,11 +2829,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2844 } 2829 }
2845 d_add(dentry, igrab(state->inode)); 2830 d_add(dentry, igrab(state->inode));
2846 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 2831 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2847 if (ctx != NULL) 2832 ctx->state = state;
2848 ctx->state = state;
2849 else
2850 nfs4_close_sync(state, fmode);
2851out: 2833out:
2834 put_nfs_open_context(ctx);
2852 return status; 2835 return status;
2853} 2836}
2854 2837
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index b47277baebab..f50d3e8d6f22 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -454,7 +454,10 @@ int objio_read_pagelist(struct nfs_read_data *rdata)
454 objios->ios->done = _read_done; 454 objios->ios->done = _read_done;
455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 455 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
456 rdata->args.offset, rdata->args.count); 456 rdata->args.offset, rdata->args.count);
457 return ore_read(objios->ios); 457 ret = ore_read(objios->ios);
458 if (unlikely(ret))
459 objio_free_result(&objios->oir);
460 return ret;
458} 461}
459 462
460/* 463/*
@@ -486,8 +489,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
486 struct nfs_write_data *wdata = objios->oir.rpcdata; 489 struct nfs_write_data *wdata = objios->oir.rpcdata;
487 struct address_space *mapping = wdata->header->inode->i_mapping; 490 struct address_space *mapping = wdata->header->inode->i_mapping;
488 pgoff_t index = offset / PAGE_SIZE; 491 pgoff_t index = offset / PAGE_SIZE;
489 struct page *page = find_get_page(mapping, index); 492 struct page *page;
493 loff_t i_size = i_size_read(wdata->header->inode);
494
495 if (offset >= i_size) {
496 *uptodate = true;
497 dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
498 return ZERO_PAGE(0);
499 }
490 500
501 page = find_get_page(mapping, index);
491 if (!page) { 502 if (!page) {
492 page = find_or_create_page(mapping, index, GFP_NOFS); 503 page = find_or_create_page(mapping, index, GFP_NOFS);
493 if (unlikely(!page)) { 504 if (unlikely(!page)) {
@@ -507,8 +518,10 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
507 518
508static void __r4w_put_page(void *priv, struct page *page) 519static void __r4w_put_page(void *priv, struct page *page)
509{ 520{
510 dprintk("%s: index=0x%lx\n", __func__, page->index); 521 dprintk("%s: index=0x%lx\n", __func__,
511 page_cache_release(page); 522 (page == ZERO_PAGE(0)) ? -1UL : page->index);
523 if (ZERO_PAGE(0) != page)
524 page_cache_release(page);
512 return; 525 return;
513} 526}
514 527
@@ -539,8 +552,10 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
539 dprintk("%s: offset=0x%llx length=0x%x\n", __func__, 552 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
540 wdata->args.offset, wdata->args.count); 553 wdata->args.offset, wdata->args.count);
541 ret = ore_write(objios->ios); 554 ret = ore_write(objios->ios);
542 if (unlikely(ret)) 555 if (unlikely(ret)) {
556 objio_free_result(&objios->oir);
543 return ret; 557 return ret;
558 }
544 559
545 if (objios->sync) 560 if (objios->sync)
546 _write_done(objios->ios, objios); 561 _write_done(objios->ios, objios);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 617c7419a08e..4433806e116f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -259,7 +259,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
259 259
260static int 260static int
261nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 261nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
262 int flags, struct nfs_open_context *ctx) 262 int flags)
263{ 263{
264 struct nfs_createdata *data; 264 struct nfs_createdata *data;
265 struct rpc_message msg = { 265 struct rpc_message msg = {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 06228192f64e..8b2a2977b720 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2419,7 +2419,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
2419 sb_mntdata.mntflags |= MS_SYNCHRONOUS; 2419 sb_mntdata.mntflags |= MS_SYNCHRONOUS;
2420 2420
2421 /* Get a superblock - note that we may end up sharing one that already exists */ 2421 /* Get a superblock - note that we may end up sharing one that already exists */
2422 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); 2422 s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata);
2423 if (IS_ERR(s)) { 2423 if (IS_ERR(s)) {
2424 mntroot = ERR_CAST(s); 2424 mntroot = ERR_CAST(s);
2425 goto out_err_nosb; 2425 goto out_err_nosb;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c8bd9c3be7f7..4700a0a929d7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -745,7 +745,7 @@ __be32
745nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, 745nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
746 int may_flags, struct file **filp) 746 int may_flags, struct file **filp)
747{ 747{
748 struct dentry *dentry; 748 struct path path;
749 struct inode *inode; 749 struct inode *inode;
750 int flags = O_RDONLY|O_LARGEFILE; 750 int flags = O_RDONLY|O_LARGEFILE;
751 __be32 err; 751 __be32 err;
@@ -762,8 +762,9 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
762 if (err) 762 if (err)
763 goto out; 763 goto out;
764 764
765 dentry = fhp->fh_dentry; 765 path.mnt = fhp->fh_export->ex_path.mnt;
766 inode = dentry->d_inode; 766 path.dentry = fhp->fh_dentry;
767 inode = path.dentry->d_inode;
767 768
768 /* Disallow write access to files with the append-only bit set 769 /* Disallow write access to files with the append-only bit set
769 * or any access when mandatory locking enabled 770 * or any access when mandatory locking enabled
@@ -792,8 +793,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
792 else 793 else
793 flags = O_WRONLY|O_LARGEFILE; 794 flags = O_WRONLY|O_LARGEFILE;
794 } 795 }
795 *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), 796 *filp = dentry_open(&path, flags, current_cred());
796 flags, current_cred());
797 if (IS_ERR(*filp)) 797 if (IS_ERR(*filp))
798 host_err = PTR_ERR(*filp); 798 host_err = PTR_ERR(*filp);
799 else { 799 else {
@@ -1329,7 +1329,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1329 err = 0; 1329 err = 0;
1330 switch (type) { 1330 switch (type) {
1331 case S_IFREG: 1331 case S_IFREG:
1332 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1332 host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
1333 if (!host_err) 1333 if (!host_err)
1334 nfsd_check_ignore_resizing(iap); 1334 nfsd_check_ignore_resizing(iap);
1335 break; 1335 break;
@@ -1492,7 +1492,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1492 goto out; 1492 goto out;
1493 } 1493 }
1494 1494
1495 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1495 host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
1496 if (host_err < 0) { 1496 if (host_err < 0) {
1497 fh_drop_write(fhp); 1497 fh_drop_write(fhp);
1498 goto out_nfserr; 1498 goto out_nfserr;
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index b72847988b78..1d0c0b84c5a3 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -63,7 +63,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
63 */ 63 */
64 64
65static struct dentry * 65static struct dentry *
66nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 66nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
67{ 67{
68 struct inode *inode; 68 struct inode *inode;
69 ino_t ino; 69 ino_t ino;
@@ -85,7 +85,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
85 * with d_instantiate(). 85 * with d_instantiate().
86 */ 86 */
87static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 87static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
88 struct nameidata *nd) 88 bool excl)
89{ 89{
90 struct inode *inode; 90 struct inode *inode;
91 struct nilfs_transaction_info ti; 91 struct nilfs_transaction_info ti;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1099a76cee59..d57c42f974ea 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1288,7 +1288,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
1288 err = -EBUSY; 1288 err = -EBUSY;
1289 goto failed; 1289 goto failed;
1290 } 1290 }
1291 s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev); 1291 s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
1292 sd.bdev);
1292 mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); 1293 mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
1293 if (IS_ERR(s)) { 1294 if (IS_ERR(s)) {
1294 err = PTR_ERR(s); 1295 err = PTR_ERR(s);
@@ -1301,7 +1302,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
1301 s_new = true; 1302 s_new = true;
1302 1303
1303 /* New superblock instance created */ 1304 /* New superblock instance created */
1304 s->s_flags = flags;
1305 s->s_mode = mode; 1305 s->s_mode = mode;
1306 strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); 1306 strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
1307 sb_set_blocksize(s, block_size(sd.bdev)); 1307 sb_set_blocksize(s, block_size(sd.bdev));
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 3568c8a8b138..d43803669739 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -61,8 +61,6 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
61static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) 61static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
62{ 62{
63 int client_fd; 63 int client_fd;
64 struct dentry *dentry;
65 struct vfsmount *mnt;
66 struct file *new_file; 64 struct file *new_file;
67 65
68 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 66 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
@@ -81,12 +79,10 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
81 * we need a new file handle for the userspace program so it can read even if it was 79 * we need a new file handle for the userspace program so it can read even if it was
82 * originally opened O_WRONLY. 80 * originally opened O_WRONLY.
83 */ 81 */
84 dentry = dget(event->path.dentry);
85 mnt = mntget(event->path.mnt);
86 /* it's possible this event was an overflow event. in that case dentry and mnt 82 /* it's possible this event was an overflow event. in that case dentry and mnt
87 * are NULL; That's fine, just don't call dentry open */ 83 * are NULL; That's fine, just don't call dentry open */
88 if (dentry && mnt) 84 if (event->path.dentry && event->path.mnt)
89 new_file = dentry_open(dentry, mnt, 85 new_file = dentry_open(&event->path,
90 group->fanotify_data.f_flags | FMODE_NONOTIFY, 86 group->fanotify_data.f_flags | FMODE_NONOTIFY,
91 current_cred()); 87 current_cred());
92 else 88 else
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index b39c5c161adb..6baadb5a8430 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -52,6 +52,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
52void __fsnotify_update_child_dentry_flags(struct inode *inode) 52void __fsnotify_update_child_dentry_flags(struct inode *inode)
53{ 53{
54 struct dentry *alias; 54 struct dentry *alias;
55 struct hlist_node *p;
55 int watched; 56 int watched;
56 57
57 if (!S_ISDIR(inode->i_mode)) 58 if (!S_ISDIR(inode->i_mode))
@@ -63,7 +64,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
63 spin_lock(&inode->i_lock); 64 spin_lock(&inode->i_lock);
64 /* run all of the dentries associated with this inode. Since this is a 65 /* run all of the dentries associated with this inode. Since this is a
65 * directory, there damn well better only be one item on this list */ 66 * directory, there damn well better only be one item on this list */
66 list_for_each_entry(alias, &inode->i_dentry, d_alias) { 67 hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
67 struct dentry *child; 68 struct dentry *child;
68 69
69 /* run all of the children of the original inode and fix their 70 /* run all of the children of the original inode and fix their
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 358273e59ade..436f36037e09 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -101,7 +101,7 @@
101 * Locking: Caller must hold i_mutex on the directory. 101 * Locking: Caller must hold i_mutex on the directory.
102 */ 102 */
103static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, 103static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent,
104 struct nameidata *nd) 104 unsigned int flags)
105{ 105{
106 ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); 106 ntfs_volume *vol = NTFS_SB(dir_ino->i_sb);
107 struct inode *dent_inode; 107 struct inode *dent_inode;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index e5ba34818332..8db4b58b2e4b 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -49,14 +49,13 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
49} 49}
50 50
51 51
52static int ocfs2_dentry_revalidate(struct dentry *dentry, 52static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
53 struct nameidata *nd)
54{ 53{
55 struct inode *inode; 54 struct inode *inode;
56 int ret = 0; /* if all else fails, just return false */ 55 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb; 56 struct ocfs2_super *osb;
58 57
59 if (nd && nd->flags & LOOKUP_RCU) 58 if (flags & LOOKUP_RCU)
60 return -ECHILD; 59 return -ECHILD;
61 60
62 inode = dentry->d_inode; 61 inode = dentry->d_inode;
@@ -170,13 +169,11 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
170 u64 parent_blkno, 169 u64 parent_blkno,
171 int skip_unhashed) 170 int skip_unhashed)
172{ 171{
173 struct list_head *p; 172 struct hlist_node *p;
174 struct dentry *dentry = NULL; 173 struct dentry *dentry;
175 174
176 spin_lock(&inode->i_lock); 175 spin_lock(&inode->i_lock);
177 list_for_each(p, &inode->i_dentry) { 176 hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
178 dentry = list_entry(p, struct dentry, d_alias);
179
180 spin_lock(&dentry->d_lock); 177 spin_lock(&dentry->d_lock);
181 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 178 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
182 trace_ocfs2_find_local_alias(dentry->d_name.len, 179 trace_ocfs2_find_local_alias(dentry->d_name.len,
@@ -184,16 +181,13 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
184 181
185 dget_dlock(dentry); 182 dget_dlock(dentry);
186 spin_unlock(&dentry->d_lock); 183 spin_unlock(&dentry->d_lock);
187 break; 184 spin_unlock(&inode->i_lock);
185 return dentry;
188 } 186 }
189 spin_unlock(&dentry->d_lock); 187 spin_unlock(&dentry->d_lock);
190
191 dentry = NULL;
192 } 188 }
193
194 spin_unlock(&inode->i_lock); 189 spin_unlock(&inode->i_lock);
195 190 return NULL;
196 return dentry;
197} 191}
198 192
199DEFINE_SPINLOCK(dentry_attach_lock); 193DEFINE_SPINLOCK(dentry_attach_lock);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index e31d6ae013ab..83b6f98e0665 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -526,7 +526,7 @@ bail:
526static int dlmfs_create(struct inode *dir, 526static int dlmfs_create(struct inode *dir,
527 struct dentry *dentry, 527 struct dentry *dentry,
528 umode_t mode, 528 umode_t mode,
529 struct nameidata *nd) 529 bool excl)
530{ 530{
531 int status = 0; 531 int status = 0;
532 struct inode *inode; 532 struct inode *inode;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 9f39c640cddf..f1fd0741162b 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -98,7 +98,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
98#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) 98#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
99 99
100static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, 100static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
101 struct nameidata *nd) 101 unsigned int flags)
102{ 102{
103 int status; 103 int status;
104 u64 blkno; 104 u64 blkno;
@@ -618,7 +618,7 @@ static int ocfs2_mkdir(struct inode *dir,
618static int ocfs2_create(struct inode *dir, 618static int ocfs2_create(struct inode *dir,
619 struct dentry *dentry, 619 struct dentry *dentry,
620 umode_t mode, 620 umode_t mode,
621 struct nameidata *nd) 621 bool excl)
622{ 622{
623 int ret; 623 int ret;
624 624
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index f00576ec320f..fb5b3ff79dc6 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -285,13 +285,13 @@ static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
285} 285}
286 286
287static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 287static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
288 struct nameidata *nd) 288 bool excl)
289{ 289{
290 return omfs_add_node(dir, dentry, mode | S_IFREG); 290 return omfs_add_node(dir, dentry, mode | S_IFREG);
291} 291}
292 292
293static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry, 293static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
294 struct nameidata *nd) 294 unsigned int flags)
295{ 295{
296 struct buffer_head *bh; 296 struct buffer_head *bh;
297 struct inode *inode = NULL; 297 struct inode *inode = NULL;
diff --git a/fs/open.c b/fs/open.c
index 1540632d8387..1e914b397e12 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -537,25 +537,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group)
537 return error; 537 return error;
538} 538}
539 539
540SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
541{
542 struct path path;
543 int error;
544
545 error = user_path(filename, &path);
546 if (error)
547 goto out;
548 error = mnt_want_write(path.mnt);
549 if (error)
550 goto out_release;
551 error = chown_common(&path, user, group);
552 mnt_drop_write(path.mnt);
553out_release:
554 path_put(&path);
555out:
556 return error;
557}
558
559SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, 540SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
560 gid_t, group, int, flag) 541 gid_t, group, int, flag)
561{ 542{
@@ -583,23 +564,15 @@ out:
583 return error; 564 return error;
584} 565}
585 566
586SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) 567SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
587{ 568{
588 struct path path; 569 return sys_fchownat(AT_FDCWD, filename, user, group, 0);
589 int error; 570}
590 571
591 error = user_lpath(filename, &path); 572SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
592 if (error) 573{
593 goto out; 574 return sys_fchownat(AT_FDCWD, filename, user, group,
594 error = mnt_want_write(path.mnt); 575 AT_SYMLINK_NOFOLLOW);
595 if (error)
596 goto out_release;
597 error = chown_common(&path, user, group);
598 mnt_drop_write(path.mnt);
599out_release:
600 path_put(&path);
601out:
602 return error;
603} 576}
604 577
605SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) 578SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
@@ -667,10 +640,9 @@ int open_check_o_direct(struct file *f)
667 return 0; 640 return 0;
668} 641}
669 642
670static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, 643static int do_dentry_open(struct file *f,
671 struct file *f, 644 int (*open)(struct inode *, struct file *),
672 int (*open)(struct inode *, struct file *), 645 const struct cred *cred)
673 const struct cred *cred)
674{ 646{
675 static const struct file_operations empty_fops = {}; 647 static const struct file_operations empty_fops = {};
676 struct inode *inode; 648 struct inode *inode;
@@ -682,9 +654,9 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
682 if (unlikely(f->f_flags & O_PATH)) 654 if (unlikely(f->f_flags & O_PATH))
683 f->f_mode = FMODE_PATH; 655 f->f_mode = FMODE_PATH;
684 656
685 inode = dentry->d_inode; 657 inode = f->f_path.dentry->d_inode;
686 if (f->f_mode & FMODE_WRITE) { 658 if (f->f_mode & FMODE_WRITE) {
687 error = __get_file_write_access(inode, mnt); 659 error = __get_file_write_access(inode, f->f_path.mnt);
688 if (error) 660 if (error)
689 goto cleanup_file; 661 goto cleanup_file;
690 if (!special_file(inode->i_mode)) 662 if (!special_file(inode->i_mode))
@@ -692,14 +664,12 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
692 } 664 }
693 665
694 f->f_mapping = inode->i_mapping; 666 f->f_mapping = inode->i_mapping;
695 f->f_path.dentry = dentry;
696 f->f_path.mnt = mnt;
697 f->f_pos = 0; 667 f->f_pos = 0;
698 file_sb_list_add(f, inode->i_sb); 668 file_sb_list_add(f, inode->i_sb);
699 669
700 if (unlikely(f->f_mode & FMODE_PATH)) { 670 if (unlikely(f->f_mode & FMODE_PATH)) {
701 f->f_op = &empty_fops; 671 f->f_op = &empty_fops;
702 return f; 672 return 0;
703 } 673 }
704 674
705 f->f_op = fops_get(inode->i_fop); 675 f->f_op = fops_get(inode->i_fop);
@@ -726,10 +696,11 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt,
726 696
727 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); 697 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
728 698
729 return f; 699 return 0;
730 700
731cleanup_all: 701cleanup_all:
732 fops_put(f->f_op); 702 fops_put(f->f_op);
703 file_sb_list_del(f);
733 if (f->f_mode & FMODE_WRITE) { 704 if (f->f_mode & FMODE_WRITE) {
734 put_write_access(inode); 705 put_write_access(inode);
735 if (!special_file(inode->i_mode)) { 706 if (!special_file(inode->i_mode)) {
@@ -740,124 +711,62 @@ cleanup_all:
740 * here, so just reset the state. 711 * here, so just reset the state.
741 */ 712 */
742 file_reset_write(f); 713 file_reset_write(f);
743 mnt_drop_write(mnt); 714 mnt_drop_write(f->f_path.mnt);
744 } 715 }
745 } 716 }
746 file_sb_list_del(f);
747 f->f_path.dentry = NULL;
748 f->f_path.mnt = NULL;
749cleanup_file: 717cleanup_file:
750 dput(dentry); 718 path_put(&f->f_path);
751 mntput(mnt); 719 f->f_path.mnt = NULL;
752 return ERR_PTR(error); 720 f->f_path.dentry = NULL;
753} 721 return error;
754
755static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
756 struct file *f,
757 int (*open)(struct inode *, struct file *),
758 const struct cred *cred)
759{
760 struct file *res = do_dentry_open(dentry, mnt, f, open, cred);
761 if (!IS_ERR(res)) {
762 int error = open_check_o_direct(f);
763 if (error) {
764 fput(res);
765 res = ERR_PTR(error);
766 }
767 } else {
768 put_filp(f);
769 }
770 return res;
771} 722}
772 723
773/** 724/**
774 * lookup_instantiate_filp - instantiates the open intent filp 725 * finish_open - finish opening a file
775 * @nd: pointer to nameidata 726 * @od: opaque open data
776 * @dentry: pointer to dentry 727 * @dentry: pointer to dentry
777 * @open: open callback 728 * @open: open callback
778 * 729 *
779 * Helper for filesystems that want to use lookup open intents and pass back 730 * This can be used to finish opening a file passed to i_op->atomic_open().
780 * a fully instantiated struct file to the caller. 731 *
781 * This function is meant to be called from within a filesystem's
782 * lookup method.
783 * Beware of calling it for non-regular files! Those ->open methods might block
784 * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo,
785 * leading to a deadlock, as nobody can open that fifo anymore, because
786 * another process to open fifo will block on locked parent when doing lookup).
787 * Note that in case of error, nd->intent.open.file is destroyed, but the
788 * path information remains valid.
789 * If the open callback is set to NULL, then the standard f_op->open() 732 * If the open callback is set to NULL, then the standard f_op->open()
790 * filesystem callback is substituted. 733 * filesystem callback is substituted.
791 */ 734 */
792struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, 735int finish_open(struct file *file, struct dentry *dentry,
793 int (*open)(struct inode *, struct file *)) 736 int (*open)(struct inode *, struct file *),
737 int *opened)
794{ 738{
795 const struct cred *cred = current_cred(); 739 int error;
740 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
796 741
797 if (IS_ERR(nd->intent.open.file)) 742 mntget(file->f_path.mnt);
798 goto out; 743 file->f_path.dentry = dget(dentry);
799 if (IS_ERR(dentry)) 744
800 goto out_err; 745 error = do_dentry_open(file, open, current_cred());
801 nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), 746 if (!error)
802 nd->intent.open.file, 747 *opened |= FILE_OPENED;
803 open, cred); 748
804out: 749 return error;
805 return nd->intent.open.file;
806out_err:
807 release_open_intent(nd);
808 nd->intent.open.file = ERR_CAST(dentry);
809 goto out;
810} 750}
811EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 751EXPORT_SYMBOL(finish_open);
812 752
813/** 753/**
814 * nameidata_to_filp - convert a nameidata to an open filp. 754 * finish_no_open - finish ->atomic_open() without opening the file
815 * @nd: pointer to nameidata 755 *
816 * @flags: open flags 756 * @od: opaque open data
757 * @dentry: dentry or NULL (as returned from ->lookup())
817 * 758 *
818 * Note that this function destroys the original nameidata 759 * This can be used to set the result of a successful lookup in ->atomic_open().
760 * The filesystem's atomic_open() method shall return NULL after calling this.
819 */ 761 */
820struct file *nameidata_to_filp(struct nameidata *nd) 762int finish_no_open(struct file *file, struct dentry *dentry)
821{ 763{
822 const struct cred *cred = current_cred(); 764 file->f_path.dentry = dentry;
823 struct file *filp; 765 return 1;
824
825 /* Pick up the filp from the open intent */
826 filp = nd->intent.open.file;
827
828 /* Has the filesystem initialised the file for us? */
829 if (filp->f_path.dentry != NULL) {
830 nd->intent.open.file = NULL;
831 } else {
832 struct file *res;
833
834 path_get(&nd->path);
835 res = do_dentry_open(nd->path.dentry, nd->path.mnt,
836 filp, NULL, cred);
837 if (!IS_ERR(res)) {
838 int error;
839
840 nd->intent.open.file = NULL;
841 BUG_ON(res != filp);
842
843 error = open_check_o_direct(filp);
844 if (error) {
845 fput(filp);
846 filp = ERR_PTR(error);
847 }
848 } else {
849 /* Allow nd->intent.open.file to be recycled */
850 filp = res;
851 }
852 }
853 return filp;
854} 766}
767EXPORT_SYMBOL(finish_no_open);
855 768
856/* 769struct file *dentry_open(const struct path *path, int flags,
857 * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
858 * error.
859 */
860struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
861 const struct cred *cred) 770 const struct cred *cred)
862{ 771{
863 int error; 772 int error;
@@ -866,18 +775,28 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
866 validate_creds(cred); 775 validate_creds(cred);
867 776
868 /* We must always pass in a valid mount pointer. */ 777 /* We must always pass in a valid mount pointer. */
869 BUG_ON(!mnt); 778 BUG_ON(!path->mnt);
870 779
871 error = -ENFILE; 780 error = -ENFILE;
872 f = get_empty_filp(); 781 f = get_empty_filp();
873 if (f == NULL) { 782 if (f == NULL)
874 dput(dentry);
875 mntput(mnt);
876 return ERR_PTR(error); 783 return ERR_PTR(error);
877 }
878 784
879 f->f_flags = flags; 785 f->f_flags = flags;
880 return __dentry_open(dentry, mnt, f, NULL, cred); 786 f->f_path = *path;
787 path_get(&f->f_path);
788 error = do_dentry_open(f, NULL, cred);
789 if (!error) {
790 error = open_check_o_direct(f);
791 if (error) {
792 fput(f);
793 f = ERR_PTR(error);
794 }
795 } else {
796 put_filp(f);
797 f = ERR_PTR(error);
798 }
799 return f;
881} 800}
882EXPORT_SYMBOL(dentry_open); 801EXPORT_SYMBOL(dentry_open);
883 802
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index bc49c975d501..4a3477949bca 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -170,13 +170,13 @@ static const struct file_operations openprom_operations = {
170 .llseek = generic_file_llseek, 170 .llseek = generic_file_llseek,
171}; 171};
172 172
173static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); 173static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, unsigned int);
174 174
175static const struct inode_operations openprom_inode_operations = { 175static const struct inode_operations openprom_inode_operations = {
176 .lookup = openpromfs_lookup, 176 .lookup = openpromfs_lookup,
177}; 177};
178 178
179static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 179static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
180{ 180{
181 struct op_inode_info *ent_oi, *oi = OP_I(dir); 181 struct op_inode_info *ent_oi, *oi = OP_I(dir);
182 struct device_node *dp, *child; 182 struct device_node *dp, *child;
diff --git a/fs/pnode.c b/fs/pnode.c
index bed378db0758..3e000a51ac0d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -237,8 +237,9 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
237 237
238 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 238 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
239 239
240 if (!(child = copy_tree(source, source->mnt.mnt_root, type))) { 240 child = copy_tree(source, source->mnt.mnt_root, type);
241 ret = -ENOMEM; 241 if (IS_ERR(child)) {
242 ret = PTR_ERR(child);
242 list_splice(tree_list, tmp_list.prev); 243 list_splice(tree_list, tmp_list.prev);
243 goto out; 244 goto out;
244 } 245 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 437195f204e1..2772208338f8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1427,16 +1427,19 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
1427static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) 1427static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1428{ 1428{
1429 struct inode *inode = dentry->d_inode; 1429 struct inode *inode = dentry->d_inode;
1430 struct path path;
1430 int error = -EACCES; 1431 int error = -EACCES;
1431 1432
1432 /* We don't need a base pointer in the /proc filesystem */
1433 path_put(&nd->path);
1434
1435 /* Are we allowed to snoop on the tasks file descriptors? */ 1433 /* Are we allowed to snoop on the tasks file descriptors? */
1436 if (!proc_fd_access_allowed(inode)) 1434 if (!proc_fd_access_allowed(inode))
1437 goto out; 1435 goto out;
1438 1436
1439 error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path); 1437 error = PROC_I(inode)->op.proc_get_link(dentry, &path);
1438 if (error)
1439 goto out;
1440
1441 nd_jump_link(nd, &path);
1442 return NULL;
1440out: 1443out:
1441 return ERR_PTR(error); 1444 return ERR_PTR(error);
1442} 1445}
@@ -1601,13 +1604,13 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
1601 * made this apply to all per process world readable and executable 1604 * made this apply to all per process world readable and executable
1602 * directories. 1605 * directories.
1603 */ 1606 */
1604int pid_revalidate(struct dentry *dentry, struct nameidata *nd) 1607int pid_revalidate(struct dentry *dentry, unsigned int flags)
1605{ 1608{
1606 struct inode *inode; 1609 struct inode *inode;
1607 struct task_struct *task; 1610 struct task_struct *task;
1608 const struct cred *cred; 1611 const struct cred *cred;
1609 1612
1610 if (nd && nd->flags & LOOKUP_RCU) 1613 if (flags & LOOKUP_RCU)
1611 return -ECHILD; 1614 return -ECHILD;
1612 1615
1613 inode = dentry->d_inode; 1616 inode = dentry->d_inode;
@@ -1781,7 +1784,7 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
1781 return proc_fd_info(dentry->d_inode, path, NULL); 1784 return proc_fd_info(dentry->d_inode, path, NULL);
1782} 1785}
1783 1786
1784static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) 1787static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
1785{ 1788{
1786 struct inode *inode; 1789 struct inode *inode;
1787 struct task_struct *task; 1790 struct task_struct *task;
@@ -1789,7 +1792,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1789 struct files_struct *files; 1792 struct files_struct *files;
1790 const struct cred *cred; 1793 const struct cred *cred;
1791 1794
1792 if (nd && nd->flags & LOOKUP_RCU) 1795 if (flags & LOOKUP_RCU)
1793 return -ECHILD; 1796 return -ECHILD;
1794 1797
1795 inode = dentry->d_inode; 1798 inode = dentry->d_inode;
@@ -1868,7 +1871,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1868 d_set_d_op(dentry, &tid_fd_dentry_operations); 1871 d_set_d_op(dentry, &tid_fd_dentry_operations);
1869 d_add(dentry, inode); 1872 d_add(dentry, inode);
1870 /* Close the race of the process dying before we return the dentry */ 1873 /* Close the race of the process dying before we return the dentry */
1871 if (tid_fd_revalidate(dentry, NULL)) 1874 if (tid_fd_revalidate(dentry, 0))
1872 error = NULL; 1875 error = NULL;
1873 1876
1874 out: 1877 out:
@@ -1956,7 +1959,7 @@ out_no_task:
1956} 1959}
1957 1960
1958static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, 1961static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
1959 struct nameidata *nd) 1962 unsigned int flags)
1960{ 1963{
1961 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); 1964 return proc_lookupfd_common(dir, dentry, proc_fd_instantiate);
1962} 1965}
@@ -2003,7 +2006,7 @@ static int dname_to_vma_addr(struct dentry *dentry,
2003 return 0; 2006 return 0;
2004} 2007}
2005 2008
2006static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) 2009static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
2007{ 2010{
2008 unsigned long vm_start, vm_end; 2011 unsigned long vm_start, vm_end;
2009 bool exact_vma_exists = false; 2012 bool exact_vma_exists = false;
@@ -2013,7 +2016,7 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd)
2013 struct inode *inode; 2016 struct inode *inode;
2014 int status = 0; 2017 int status = 0;
2015 2018
2016 if (nd && nd->flags & LOOKUP_RCU) 2019 if (flags & LOOKUP_RCU)
2017 return -ECHILD; 2020 return -ECHILD;
2018 2021
2019 if (!capable(CAP_SYS_ADMIN)) { 2022 if (!capable(CAP_SYS_ADMIN)) {
@@ -2145,7 +2148,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
2145} 2148}
2146 2149
2147static struct dentry *proc_map_files_lookup(struct inode *dir, 2150static struct dentry *proc_map_files_lookup(struct inode *dir,
2148 struct dentry *dentry, struct nameidata *nd) 2151 struct dentry *dentry, unsigned int flags)
2149{ 2152{
2150 unsigned long vm_start, vm_end; 2153 unsigned long vm_start, vm_end;
2151 struct vm_area_struct *vma; 2154 struct vm_area_struct *vma;
@@ -2371,7 +2374,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2371 d_set_d_op(dentry, &tid_fd_dentry_operations); 2374 d_set_d_op(dentry, &tid_fd_dentry_operations);
2372 d_add(dentry, inode); 2375 d_add(dentry, inode);
2373 /* Close the race of the process dying before we return the dentry */ 2376 /* Close the race of the process dying before we return the dentry */
2374 if (tid_fd_revalidate(dentry, NULL)) 2377 if (tid_fd_revalidate(dentry, 0))
2375 error = NULL; 2378 error = NULL;
2376 2379
2377 out: 2380 out:
@@ -2380,7 +2383,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir,
2380 2383
2381static struct dentry *proc_lookupfdinfo(struct inode *dir, 2384static struct dentry *proc_lookupfdinfo(struct inode *dir,
2382 struct dentry *dentry, 2385 struct dentry *dentry,
2383 struct nameidata *nd) 2386 unsigned int flags)
2384{ 2387{
2385 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); 2388 return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
2386} 2389}
@@ -2430,7 +2433,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
2430 d_set_d_op(dentry, &pid_dentry_operations); 2433 d_set_d_op(dentry, &pid_dentry_operations);
2431 d_add(dentry, inode); 2434 d_add(dentry, inode);
2432 /* Close the race of the process dying before we return the dentry */ 2435 /* Close the race of the process dying before we return the dentry */
2433 if (pid_revalidate(dentry, NULL)) 2436 if (pid_revalidate(dentry, 0))
2434 error = NULL; 2437 error = NULL;
2435out: 2438out:
2436 return error; 2439 return error;
@@ -2630,7 +2633,7 @@ static const struct file_operations proc_attr_dir_operations = {
2630}; 2633};
2631 2634
2632static struct dentry *proc_attr_dir_lookup(struct inode *dir, 2635static struct dentry *proc_attr_dir_lookup(struct inode *dir,
2633 struct dentry *dentry, struct nameidata *nd) 2636 struct dentry *dentry, unsigned int flags)
2634{ 2637{
2635 return proc_pident_lookup(dir, dentry, 2638 return proc_pident_lookup(dir, dentry,
2636 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); 2639 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
@@ -3114,7 +3117,8 @@ static const struct file_operations proc_tgid_base_operations = {
3114 .llseek = default_llseek, 3117 .llseek = default_llseek,
3115}; 3118};
3116 3119
3117static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 3120static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
3121{
3118 return proc_pident_lookup(dir, dentry, 3122 return proc_pident_lookup(dir, dentry,
3119 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); 3123 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
3120} 3124}
@@ -3237,13 +3241,13 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
3237 3241
3238 d_add(dentry, inode); 3242 d_add(dentry, inode);
3239 /* Close the race of the process dying before we return the dentry */ 3243 /* Close the race of the process dying before we return the dentry */
3240 if (pid_revalidate(dentry, NULL)) 3244 if (pid_revalidate(dentry, 0))
3241 error = NULL; 3245 error = NULL;
3242out: 3246out:
3243 return error; 3247 return error;
3244} 3248}
3245 3249
3246struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 3250struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
3247{ 3251{
3248 struct dentry *result; 3252 struct dentry *result;
3249 struct task_struct *task; 3253 struct task_struct *task;
@@ -3470,7 +3474,8 @@ static int proc_tid_base_readdir(struct file * filp,
3470 tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); 3474 tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
3471} 3475}
3472 3476
3473static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ 3477static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
3478{
3474 return proc_pident_lookup(dir, dentry, 3479 return proc_pident_lookup(dir, dentry,
3475 tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); 3480 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
3476} 3481}
@@ -3508,13 +3513,13 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
3508 3513
3509 d_add(dentry, inode); 3514 d_add(dentry, inode);
3510 /* Close the race of the process dying before we return the dentry */ 3515 /* Close the race of the process dying before we return the dentry */
3511 if (pid_revalidate(dentry, NULL)) 3516 if (pid_revalidate(dentry, 0))
3512 error = NULL; 3517 error = NULL;
3513out: 3518out:
3514 return error; 3519 return error;
3515} 3520}
3516 3521
3517static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 3522static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
3518{ 3523{
3519 struct dentry *result = ERR_PTR(-ENOENT); 3524 struct dentry *result = ERR_PTR(-ENOENT);
3520 struct task_struct *task; 3525 struct task_struct *task;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 2edf34f2eb61..b3647fe6a608 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -446,7 +446,7 @@ out_unlock:
446} 446}
447 447
448struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, 448struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
449 struct nameidata *nd) 449 unsigned int flags)
450{ 450{
451 return proc_lookup_de(PDE(dir), dir, dentry); 451 return proc_lookup_de(PDE(dir), dir, dentry);
452} 452}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index eca4aca5b6e2..e1167a1c9126 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -106,7 +106,7 @@ void pde_users_dec(struct proc_dir_entry *pde);
106 106
107extern spinlock_t proc_subdir_lock; 107extern spinlock_t proc_subdir_lock;
108 108
109struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); 109struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int);
110int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); 110int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
111unsigned long task_vsize(struct mm_struct *); 111unsigned long task_vsize(struct mm_struct *);
112unsigned long task_statm(struct mm_struct *, 112unsigned long task_statm(struct mm_struct *,
@@ -132,7 +132,7 @@ int proc_remount(struct super_block *sb, int *flags, char *data);
132 * of the /proc/<pid> subdirectories. 132 * of the /proc/<pid> subdirectories.
133 */ 133 */
134int proc_readdir(struct file *, void *, filldir_t); 134int proc_readdir(struct file *, void *, filldir_t);
135struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); 135struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
136 136
137 137
138 138
@@ -142,7 +142,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
142int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 142int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
143 const char *name, int len, 143 const char *name, int len,
144 instantiate_t instantiate, struct task_struct *task, const void *ptr); 144 instantiate_t instantiate, struct task_struct *task, const void *ptr);
145int pid_revalidate(struct dentry *dentry, struct nameidata *nd); 145int pid_revalidate(struct dentry *dentry, unsigned int flags);
146struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); 146struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
147extern const struct dentry_operations pid_dentry_operations; 147extern const struct dentry_operations pid_dentry_operations;
148int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 148int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 0d9e23a39e49..b178ed733c36 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -56,7 +56,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
56 d_set_d_op(dentry, &pid_dentry_operations); 56 d_set_d_op(dentry, &pid_dentry_operations);
57 d_add(dentry, inode); 57 d_add(dentry, inode);
58 /* Close the race of the process dying before we return the dentry */ 58 /* Close the race of the process dying before we return the dentry */
59 if (pid_revalidate(dentry, NULL)) 59 if (pid_revalidate(dentry, 0))
60 error = NULL; 60 error = NULL;
61out: 61out:
62 return error; 62 return error;
@@ -140,7 +140,7 @@ const struct file_operations proc_ns_dir_operations = {
140}; 140};
141 141
142static struct dentry *proc_ns_dir_lookup(struct inode *dir, 142static struct dentry *proc_ns_dir_lookup(struct inode *dir,
143 struct dentry *dentry, struct nameidata *nd) 143 struct dentry *dentry, unsigned int flags)
144{ 144{
145 struct dentry *error; 145 struct dentry *error;
146 struct task_struct *task = get_proc_task(dir); 146 struct task_struct *task = get_proc_task(dir);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 927cbd115e53..df7dd08d4391 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -101,6 +101,11 @@ void proc_device_tree_update_prop(struct proc_dir_entry *pde,
101{ 101{
102 struct proc_dir_entry *ent; 102 struct proc_dir_entry *ent;
103 103
104 if (!oldprop) {
105 proc_device_tree_add_prop(pde, newprop);
106 return;
107 }
108
104 for (ent = pde->subdir; ent != NULL; ent = ent->next) 109 for (ent = pde->subdir; ent != NULL; ent = ent->next)
105 if (ent->data == oldprop) 110 if (ent->data == oldprop)
106 break; 111 break;
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 06e1cc17caf6..fe72cd073dea 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -119,7 +119,7 @@ static struct net *get_proc_task_net(struct inode *dir)
119} 119}
120 120
121static struct dentry *proc_tgid_net_lookup(struct inode *dir, 121static struct dentry *proc_tgid_net_lookup(struct inode *dir,
122 struct dentry *dentry, struct nameidata *nd) 122 struct dentry *dentry, unsigned int flags)
123{ 123{
124 struct dentry *de; 124 struct dentry *de;
125 struct net *net; 125 struct net *net;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 3476bca8f7af..dfafeb2b05a0 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -433,7 +433,7 @@ static struct ctl_table_header *grab_header(struct inode *inode)
433} 433}
434 434
435static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, 435static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
436 struct nameidata *nd) 436 unsigned int flags)
437{ 437{
438 struct ctl_table_header *head = grab_header(dir); 438 struct ctl_table_header *head = grab_header(dir);
439 struct ctl_table_header *h = NULL; 439 struct ctl_table_header *h = NULL;
@@ -794,9 +794,9 @@ static const struct inode_operations proc_sys_dir_operations = {
794 .getattr = proc_sys_getattr, 794 .getattr = proc_sys_getattr,
795}; 795};
796 796
797static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 797static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
798{ 798{
799 if (nd->flags & LOOKUP_RCU) 799 if (flags & LOOKUP_RCU)
800 return -ECHILD; 800 return -ECHILD;
801 return !PROC_I(dentry->d_inode)->sysctl->unregistering; 801 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
802} 802}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 7c30fce037c0..9a2d9fd7cadd 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -111,7 +111,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
111 options = data; 111 options = data;
112 } 112 }
113 113
114 sb = sget(fs_type, proc_test_super, proc_set_super, ns); 114 sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns);
115 if (IS_ERR(sb)) 115 if (IS_ERR(sb))
116 return ERR_CAST(sb); 116 return ERR_CAST(sb);
117 117
@@ -121,7 +121,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
121 } 121 }
122 122
123 if (!sb->s_root) { 123 if (!sb->s_root) {
124 sb->s_flags = flags;
125 err = proc_fill_super(sb); 124 err = proc_fill_super(sb);
126 if (err) { 125 if (err) {
127 deactivate_locked_super(sb); 126 deactivate_locked_super(sb);
@@ -200,13 +199,12 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
200 return 0; 199 return 0;
201} 200}
202 201
203static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 202static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
204{ 203{
205 if (!proc_lookup(dir, dentry, nd)) { 204 if (!proc_lookup(dir, dentry, flags))
206 return NULL; 205 return NULL;
207 }
208 206
209 return proc_pid_lookup(dir, dentry, nd); 207 return proc_pid_lookup(dir, dentry, flags);
210} 208}
211 209
212static int proc_root_readdir(struct file * filp, 210static int proc_root_readdir(struct file * filp,
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 5e289a7cbad1..5fe34c355e85 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -17,7 +17,7 @@
17 17
18static unsigned mounts_poll(struct file *file, poll_table *wait) 18static unsigned mounts_poll(struct file *file, poll_table *wait)
19{ 19{
20 struct proc_mounts *p = file->private_data; 20 struct proc_mounts *p = proc_mounts(file->private_data);
21 struct mnt_namespace *ns = p->ns; 21 struct mnt_namespace *ns = p->ns;
22 unsigned res = POLLIN | POLLRDNORM; 22 unsigned res = POLLIN | POLLRDNORM;
23 23
@@ -121,7 +121,7 @@ out:
121 121
122static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) 122static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
123{ 123{
124 struct proc_mounts *p = m->private; 124 struct proc_mounts *p = proc_mounts(m);
125 struct mount *r = real_mount(mnt); 125 struct mount *r = real_mount(mnt);
126 struct super_block *sb = mnt->mnt_sb; 126 struct super_block *sb = mnt->mnt_sb;
127 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; 127 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -268,7 +268,6 @@ static int mounts_open_common(struct inode *inode, struct file *file,
268 if (ret) 268 if (ret)
269 goto err_free; 269 goto err_free;
270 270
271 p->m.private = p;
272 p->ns = ns; 271 p->ns = ns;
273 p->root = root; 272 p->root = root;
274 p->m.poll_event = ns->event; 273 p->m.poll_event = ns->event;
@@ -288,7 +287,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
288 287
289static int mounts_release(struct inode *inode, struct file *file) 288static int mounts_release(struct inode *inode, struct file *file)
290{ 289{
291 struct proc_mounts *p = file->private_data; 290 struct proc_mounts *p = proc_mounts(file->private_data);
292 path_put(&p->root); 291 path_put(&p->root);
293 put_mnt_ns(p->ns); 292 put_mnt_ns(p->ns);
294 return seq_release(inode, file); 293 return seq_release(inode, file);
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index a512c0b30e8e..d024505ba007 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -95,7 +95,7 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir,
95 return NULL; 95 return NULL;
96} 96}
97 97
98struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 98struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
99{ 99{
100 int ino; 100 int ino;
101 struct qnx4_inode_entry *de; 101 struct qnx4_inode_entry *de;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
index 244d4620189b..34e2d329c97e 100644
--- a/fs/qnx4/qnx4.h
+++ b/fs/qnx4/qnx4.h
@@ -23,7 +23,7 @@ struct qnx4_inode_info {
23}; 23};
24 24
25extern struct inode *qnx4_iget(struct super_block *, unsigned long); 25extern struct inode *qnx4_iget(struct super_block *, unsigned long);
26extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); 26extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags);
27extern unsigned long qnx4_count_free_blocks(struct super_block *sb); 27extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
28extern unsigned long qnx4_block_map(struct inode *inode, long iblock); 28extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
29 29
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index e44012dc5645..2049c814bda4 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -622,7 +622,6 @@ static struct inode *qnx6_alloc_inode(struct super_block *sb)
622static void qnx6_i_callback(struct rcu_head *head) 622static void qnx6_i_callback(struct rcu_head *head)
623{ 623{
624 struct inode *inode = container_of(head, struct inode, i_rcu); 624 struct inode *inode = container_of(head, struct inode, i_rcu);
625 INIT_LIST_HEAD(&inode->i_dentry);
626 kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode)); 625 kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode));
627} 626}
628 627
diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c
index 8a97289e04ad..0561326a94f5 100644
--- a/fs/qnx6/namei.c
+++ b/fs/qnx6/namei.c
@@ -13,7 +13,7 @@
13#include "qnx6.h" 13#include "qnx6.h"
14 14
15struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, 15struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
16 struct nameidata *nd) 16 unsigned int flags)
17{ 17{
18 unsigned ino; 18 unsigned ino;
19 struct page *page; 19 struct page *page;
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index 6c5e02a0b6a8..b00fcc960d37 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -45,7 +45,7 @@ struct qnx6_inode_info {
45 45
46extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino); 46extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino);
47extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, 47extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry,
48 struct nameidata *nd); 48 unsigned int flags);
49 49
50#ifdef CONFIG_QNX6FS_DEBUG 50#ifdef CONFIG_QNX6FS_DEBUG
51extern void qnx6_superblock_debug(struct qnx6_super_block *, 51extern void qnx6_superblock_debug(struct qnx6_super_block *,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 10cbe841cb7e..36a29b753c79 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -78,7 +78,7 @@
78#include <linux/quotaops.h> 78#include <linux/quotaops.h>
79#include "../internal.h" /* ugh */ 79#include "../internal.h" /* ugh */
80 80
81#include <asm/uaccess.h> 81#include <linux/uaccess.h>
82 82
83/* 83/*
84 * There are three quota SMP locks. dq_list_lock protects all lists with quotas 84 * There are three quota SMP locks. dq_list_lock protects all lists with quotas
@@ -595,12 +595,14 @@ out:
595} 595}
596EXPORT_SYMBOL(dquot_scan_active); 596EXPORT_SYMBOL(dquot_scan_active);
597 597
598int dquot_quota_sync(struct super_block *sb, int type, int wait) 598/* Write all dquot structures to quota files */
599int dquot_writeback_dquots(struct super_block *sb, int type)
599{ 600{
600 struct list_head *dirty; 601 struct list_head *dirty;
601 struct dquot *dquot; 602 struct dquot *dquot;
602 struct quota_info *dqopt = sb_dqopt(sb); 603 struct quota_info *dqopt = sb_dqopt(sb);
603 int cnt; 604 int cnt;
605 int err, ret = 0;
604 606
605 mutex_lock(&dqopt->dqonoff_mutex); 607 mutex_lock(&dqopt->dqonoff_mutex);
606 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 608 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -624,7 +626,9 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
624 atomic_inc(&dquot->dq_count); 626 atomic_inc(&dquot->dq_count);
625 spin_unlock(&dq_list_lock); 627 spin_unlock(&dq_list_lock);
626 dqstats_inc(DQST_LOOKUPS); 628 dqstats_inc(DQST_LOOKUPS);
627 sb->dq_op->write_dquot(dquot); 629 err = sb->dq_op->write_dquot(dquot);
630 if (!ret && err)
631 err = ret;
628 dqput(dquot); 632 dqput(dquot);
629 spin_lock(&dq_list_lock); 633 spin_lock(&dq_list_lock);
630 } 634 }
@@ -638,7 +642,21 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait)
638 dqstats_inc(DQST_SYNCS); 642 dqstats_inc(DQST_SYNCS);
639 mutex_unlock(&dqopt->dqonoff_mutex); 643 mutex_unlock(&dqopt->dqonoff_mutex);
640 644
641 if (!wait || (dqopt->flags & DQUOT_QUOTA_SYS_FILE)) 645 return ret;
646}
647EXPORT_SYMBOL(dquot_writeback_dquots);
648
649/* Write all dquot structures to disk and make them visible from userspace */
650int dquot_quota_sync(struct super_block *sb, int type)
651{
652 struct quota_info *dqopt = sb_dqopt(sb);
653 int cnt;
654 int ret;
655
656 ret = dquot_writeback_dquots(sb, type);
657 if (ret)
658 return ret;
659 if (dqopt->flags & DQUOT_QUOTA_SYS_FILE)
642 return 0; 660 return 0;
643 661
644 /* This is not very clever (and fast) but currently I don't know about 662 /* This is not very clever (and fast) but currently I don't know about
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 9a391204ca27..6f155788cbc6 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -9,7 +9,7 @@
9#include <linux/namei.h> 9#include <linux/namei.h>
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <asm/current.h> 11#include <asm/current.h>
12#include <asm/uaccess.h> 12#include <linux/uaccess.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/security.h> 14#include <linux/security.h>
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
@@ -47,7 +47,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
47static void quota_sync_one(struct super_block *sb, void *arg) 47static void quota_sync_one(struct super_block *sb, void *arg)
48{ 48{
49 if (sb->s_qcop && sb->s_qcop->quota_sync) 49 if (sb->s_qcop && sb->s_qcop->quota_sync)
50 sb->s_qcop->quota_sync(sb, *(int *)arg, 1); 50 sb->s_qcop->quota_sync(sb, *(int *)arg);
51} 51}
52 52
53static int quota_sync_all(int type) 53static int quota_sync_all(int type)
@@ -270,7 +270,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
270 case Q_SYNC: 270 case Q_SYNC:
271 if (!sb->s_qcop->quota_sync) 271 if (!sb->s_qcop->quota_sync)
272 return -ENOSYS; 272 return -ENOSYS;
273 return sb->s_qcop->quota_sync(sb, type, 1); 273 return sb->s_qcop->quota_sync(sb, type);
274 case Q_XQUOTAON: 274 case Q_XQUOTAON:
275 case Q_XQUOTAOFF: 275 case Q_XQUOTAOFF:
276 case Q_XQUOTARM: 276 case Q_XQUOTARM:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a1fdabe21dec..eab8c09d3801 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
114 return retval; 114 return retval;
115} 115}
116 116
117static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) 117static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
118{ 118{
119 return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); 119 return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
120} 120}
diff --git a/fs/read_write.c b/fs/read_write.c
index c20614f86c01..1adfb691e4f1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -55,10 +55,11 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
55 * @file: file structure to seek on 55 * @file: file structure to seek on
56 * @offset: file offset to seek to 56 * @offset: file offset to seek to
57 * @origin: type of seek 57 * @origin: type of seek
58 * @size: max size of file system 58 * @size: max size of this file in file system
59 * @eof: offset used for SEEK_END position
59 * 60 *
60 * This is a variant of generic_file_llseek that allows passing in a custom 61 * This is a variant of generic_file_llseek that allows passing in a custom
61 * file size. 62 * maximum file size and a custom EOF position, for e.g. hashed directories
62 * 63 *
63 * Synchronization: 64 * Synchronization:
64 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 65 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
@@ -67,13 +68,13 @@ static loff_t lseek_execute(struct file *file, struct inode *inode,
67 */ 68 */
68loff_t 69loff_t
69generic_file_llseek_size(struct file *file, loff_t offset, int origin, 70generic_file_llseek_size(struct file *file, loff_t offset, int origin,
70 loff_t maxsize) 71 loff_t maxsize, loff_t eof)
71{ 72{
72 struct inode *inode = file->f_mapping->host; 73 struct inode *inode = file->f_mapping->host;
73 74
74 switch (origin) { 75 switch (origin) {
75 case SEEK_END: 76 case SEEK_END:
76 offset += i_size_read(inode); 77 offset += eof;
77 break; 78 break;
78 case SEEK_CUR: 79 case SEEK_CUR:
79 /* 80 /*
@@ -99,7 +100,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin,
99 * In the generic case the entire file is data, so as long as 100 * In the generic case the entire file is data, so as long as
100 * offset isn't at the end of the file then the offset is data. 101 * offset isn't at the end of the file then the offset is data.
101 */ 102 */
102 if (offset >= i_size_read(inode)) 103 if (offset >= eof)
103 return -ENXIO; 104 return -ENXIO;
104 break; 105 break;
105 case SEEK_HOLE: 106 case SEEK_HOLE:
@@ -107,9 +108,9 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin,
107 * There is a virtual hole at the end of the file, so as long as 108 * There is a virtual hole at the end of the file, so as long as
108 * offset isn't i_size or larger, return i_size. 109 * offset isn't i_size or larger, return i_size.
109 */ 110 */
110 if (offset >= i_size_read(inode)) 111 if (offset >= eof)
111 return -ENXIO; 112 return -ENXIO;
112 offset = i_size_read(inode); 113 offset = eof;
113 break; 114 break;
114 } 115 }
115 116
@@ -132,7 +133,8 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
132 struct inode *inode = file->f_mapping->host; 133 struct inode *inode = file->f_mapping->host;
133 134
134 return generic_file_llseek_size(file, offset, origin, 135 return generic_file_llseek_size(file, offset, origin,
135 inode->i_sb->s_maxbytes); 136 inode->i_sb->s_maxbytes,
137 i_size_read(inode));
136} 138}
137EXPORT_SYMBOL(generic_file_llseek); 139EXPORT_SYMBOL(generic_file_llseek);
138 140
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 84e8a69cee9d..8567fb847601 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -322,7 +322,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen,
322} 322}
323 323
324static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, 324static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
325 struct nameidata *nd) 325 unsigned int flags)
326{ 326{
327 int retval; 327 int retval;
328 int lock_depth; 328 int lock_depth;
@@ -573,7 +573,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
573} 573}
574 574
575static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 575static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
576 struct nameidata *nd) 576 bool excl)
577{ 577{
578 int retval; 578 int retval;
579 struct inode *inode; 579 struct inode *inode;
@@ -634,8 +634,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod
634 reiserfs_update_inode_transaction(inode); 634 reiserfs_update_inode_transaction(inode);
635 reiserfs_update_inode_transaction(dir); 635 reiserfs_update_inode_transaction(dir);
636 636
637 d_instantiate(dentry, inode);
638 unlock_new_inode(inode); 637 unlock_new_inode(inode);
638 d_instantiate(dentry, inode);
639 retval = journal_end(&th, dir->i_sb, jbegin_count); 639 retval = journal_end(&th, dir->i_sb, jbegin_count);
640 640
641 out_failed: 641 out_failed:
@@ -712,8 +712,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode
712 goto out_failed; 712 goto out_failed;
713 } 713 }
714 714
715 d_instantiate(dentry, inode);
716 unlock_new_inode(inode); 715 unlock_new_inode(inode);
716 d_instantiate(dentry, inode);
717 retval = journal_end(&th, dir->i_sb, jbegin_count); 717 retval = journal_end(&th, dir->i_sb, jbegin_count);
718 718
719 out_failed: 719 out_failed:
@@ -800,8 +800,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
800 // the above add_entry did not update dir's stat data 800 // the above add_entry did not update dir's stat data
801 reiserfs_update_sd(&th, dir); 801 reiserfs_update_sd(&th, dir);
802 802
803 d_instantiate(dentry, inode);
804 unlock_new_inode(inode); 803 unlock_new_inode(inode);
804 d_instantiate(dentry, inode);
805 retval = journal_end(&th, dir->i_sb, jbegin_count); 805 retval = journal_end(&th, dir->i_sb, jbegin_count);
806out_failed: 806out_failed:
807 reiserfs_write_unlock_once(dir->i_sb, lock_depth); 807 reiserfs_write_unlock_once(dir->i_sb, lock_depth);
@@ -1096,8 +1096,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
1096 goto out_failed; 1096 goto out_failed;
1097 } 1097 }
1098 1098
1099 d_instantiate(dentry, inode);
1100 unlock_new_inode(inode); 1099 unlock_new_inode(inode);
1100 d_instantiate(dentry, inode);
1101 retval = journal_end(&th, parent_dir->i_sb, jbegin_count); 1101 retval = journal_end(&th, parent_dir->i_sb, jbegin_count);
1102 out_failed: 1102 out_failed:
1103 reiserfs_write_unlock(parent_dir->i_sb); 1103 reiserfs_write_unlock(parent_dir->i_sb);
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 2c1ade692cc8..e60e87035bb3 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -403,7 +403,7 @@ static void *r_start(struct seq_file *m, loff_t * pos)
403 if (l) 403 if (l)
404 return NULL; 404 return NULL;
405 405
406 if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, s))) 406 if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s)))
407 return NULL; 407 return NULL;
408 408
409 up_write(&s->s_umount); 409 up_write(&s->s_umount);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 651ce767b55d..7a37dabf5a96 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -68,6 +68,11 @@ static int reiserfs_sync_fs(struct super_block *s, int wait)
68{ 68{
69 struct reiserfs_transaction_handle th; 69 struct reiserfs_transaction_handle th;
70 70
71 /*
72 * Writeback quota in non-journalled quota case - journalled quota has
73 * no dirty dquots
74 */
75 dquot_writeback_dquots(s, -1);
71 reiserfs_write_lock(s); 76 reiserfs_write_lock(s);
72 if (!journal_begin(&th, s, 1)) 77 if (!journal_begin(&th, s, 1))
73 if (!journal_end_sync(&th, s, 1)) 78 if (!journal_end_sync(&th, s, 1))
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 46fc1c20a6b1..d319963aeb11 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -62,7 +62,7 @@
62static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) 62static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
63{ 63{
64 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 64 BUG_ON(!mutex_is_locked(&dir->i_mutex));
65 return dir->i_op->create(dir, dentry, mode, NULL); 65 return dir->i_op->create(dir, dentry, mode, true);
66} 66}
67#endif 67#endif
68 68
@@ -942,7 +942,7 @@ int reiserfs_permission(struct inode *inode, int mask)
942 return generic_permission(inode, mask); 942 return generic_permission(inode, mask);
943} 943}
944 944
945static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 945static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags)
946{ 946{
947 return -EPERM; 947 return -EPERM;
948} 948}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index e64f6b5f7ae5..77c5f2173983 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -210,7 +210,7 @@ out:
210 * look up an entry in a directory 210 * look up an entry in a directory
211 */ 211 */
212static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, 212static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry,
213 struct nameidata *nd) 213 unsigned int flags)
214{ 214{
215 unsigned long offset, maxoff; 215 unsigned long offset, maxoff;
216 struct inode *inode; 216 struct inode *inode;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0cbd0494b79e..14cf9de1dbe1 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -385,15 +385,12 @@ int seq_escape(struct seq_file *m, const char *s, const char *esc)
385} 385}
386EXPORT_SYMBOL(seq_escape); 386EXPORT_SYMBOL(seq_escape);
387 387
388int seq_printf(struct seq_file *m, const char *f, ...) 388int seq_vprintf(struct seq_file *m, const char *f, va_list args)
389{ 389{
390 va_list args;
391 int len; 390 int len;
392 391
393 if (m->count < m->size) { 392 if (m->count < m->size) {
394 va_start(args, f);
395 len = vsnprintf(m->buf + m->count, m->size - m->count, f, args); 393 len = vsnprintf(m->buf + m->count, m->size - m->count, f, args);
396 va_end(args);
397 if (m->count + len < m->size) { 394 if (m->count + len < m->size) {
398 m->count += len; 395 m->count += len;
399 return 0; 396 return 0;
@@ -402,6 +399,19 @@ int seq_printf(struct seq_file *m, const char *f, ...)
402 seq_set_overflow(m); 399 seq_set_overflow(m);
403 return -1; 400 return -1;
404} 401}
402EXPORT_SYMBOL(seq_vprintf);
403
404int seq_printf(struct seq_file *m, const char *f, ...)
405{
406 int ret;
407 va_list args;
408
409 va_start(args, f);
410 ret = seq_vprintf(m, f, args);
411 va_end(args);
412
413 return ret;
414}
405EXPORT_SYMBOL(seq_printf); 415EXPORT_SYMBOL(seq_printf);
406 416
407/** 417/**
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index abcc58f3c152..7834a517f7f4 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -134,7 +134,7 @@ out:
134 134
135 135
136static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, 136static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
137 struct nameidata *nd) 137 unsigned int flags)
138{ 138{
139 const unsigned char *name = dentry->d_name.name; 139 const unsigned char *name = dentry->d_name.name;
140 int len = dentry->d_name.len; 140 int len = dentry->d_name.len;
diff --git a/fs/super.c b/fs/super.c
index cf001775617f..c743fb3be4b8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -105,11 +105,12 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
105/** 105/**
106 * alloc_super - create new superblock 106 * alloc_super - create new superblock
107 * @type: filesystem type superblock should belong to 107 * @type: filesystem type superblock should belong to
108 * @flags: the mount flags
108 * 109 *
109 * Allocates and initializes a new &struct super_block. alloc_super() 110 * Allocates and initializes a new &struct super_block. alloc_super()
110 * returns a pointer new superblock or %NULL if allocation had failed. 111 * returns a pointer new superblock or %NULL if allocation had failed.
111 */ 112 */
112static struct super_block *alloc_super(struct file_system_type *type) 113static struct super_block *alloc_super(struct file_system_type *type, int flags)
113{ 114{
114 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); 115 struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER);
115 static const struct super_operations default_op; 116 static const struct super_operations default_op;
@@ -136,6 +137,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
136#else 137#else
137 INIT_LIST_HEAD(&s->s_files); 138 INIT_LIST_HEAD(&s->s_files);
138#endif 139#endif
140 s->s_flags = flags;
139 s->s_bdi = &default_backing_dev_info; 141 s->s_bdi = &default_backing_dev_info;
140 INIT_HLIST_NODE(&s->s_instances); 142 INIT_HLIST_NODE(&s->s_instances);
141 INIT_HLIST_BL_HEAD(&s->s_anon); 143 INIT_HLIST_BL_HEAD(&s->s_anon);
@@ -415,11 +417,13 @@ EXPORT_SYMBOL(generic_shutdown_super);
415 * @type: filesystem type superblock should belong to 417 * @type: filesystem type superblock should belong to
416 * @test: comparison callback 418 * @test: comparison callback
417 * @set: setup callback 419 * @set: setup callback
420 * @flags: mount flags
418 * @data: argument to each of them 421 * @data: argument to each of them
419 */ 422 */
420struct super_block *sget(struct file_system_type *type, 423struct super_block *sget(struct file_system_type *type,
421 int (*test)(struct super_block *,void *), 424 int (*test)(struct super_block *,void *),
422 int (*set)(struct super_block *,void *), 425 int (*set)(struct super_block *,void *),
426 int flags,
423 void *data) 427 void *data)
424{ 428{
425 struct super_block *s = NULL; 429 struct super_block *s = NULL;
@@ -450,7 +454,7 @@ retry:
450 } 454 }
451 if (!s) { 455 if (!s) {
452 spin_unlock(&sb_lock); 456 spin_unlock(&sb_lock);
453 s = alloc_super(type); 457 s = alloc_super(type, flags);
454 if (!s) 458 if (!s)
455 return ERR_PTR(-ENOMEM); 459 return ERR_PTR(-ENOMEM);
456 goto retry; 460 goto retry;
@@ -925,13 +929,12 @@ struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
925{ 929{
926 struct super_block *sb; 930 struct super_block *sb;
927 931
928 sb = sget(fs_type, ns_test_super, ns_set_super, data); 932 sb = sget(fs_type, ns_test_super, ns_set_super, flags, data);
929 if (IS_ERR(sb)) 933 if (IS_ERR(sb))
930 return ERR_CAST(sb); 934 return ERR_CAST(sb);
931 935
932 if (!sb->s_root) { 936 if (!sb->s_root) {
933 int err; 937 int err;
934 sb->s_flags = flags;
935 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 938 err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
936 if (err) { 939 if (err) {
937 deactivate_locked_super(sb); 940 deactivate_locked_super(sb);
@@ -992,7 +995,8 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
992 error = -EBUSY; 995 error = -EBUSY;
993 goto error_bdev; 996 goto error_bdev;
994 } 997 }
995 s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); 998 s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC,
999 bdev);
996 mutex_unlock(&bdev->bd_fsfreeze_mutex); 1000 mutex_unlock(&bdev->bd_fsfreeze_mutex);
997 if (IS_ERR(s)) 1001 if (IS_ERR(s))
998 goto error_s; 1002 goto error_s;
@@ -1017,7 +1021,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
1017 } else { 1021 } else {
1018 char b[BDEVNAME_SIZE]; 1022 char b[BDEVNAME_SIZE];
1019 1023
1020 s->s_flags = flags | MS_NOSEC;
1021 s->s_mode = mode; 1024 s->s_mode = mode;
1022 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 1025 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
1023 sb_set_blocksize(s, block_size(bdev)); 1026 sb_set_blocksize(s, block_size(bdev));
@@ -1062,13 +1065,11 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
1062 int (*fill_super)(struct super_block *, void *, int)) 1065 int (*fill_super)(struct super_block *, void *, int))
1063{ 1066{
1064 int error; 1067 int error;
1065 struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); 1068 struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
1066 1069
1067 if (IS_ERR(s)) 1070 if (IS_ERR(s))
1068 return ERR_CAST(s); 1071 return ERR_CAST(s);
1069 1072
1070 s->s_flags = flags;
1071
1072 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 1073 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1073 if (error) { 1074 if (error) {
1074 deactivate_locked_super(s); 1075 deactivate_locked_super(s);
@@ -1091,11 +1092,10 @@ struct dentry *mount_single(struct file_system_type *fs_type,
1091 struct super_block *s; 1092 struct super_block *s;
1092 int error; 1093 int error;
1093 1094
1094 s = sget(fs_type, compare_single, set_anon_super, NULL); 1095 s = sget(fs_type, compare_single, set_anon_super, flags, NULL);
1095 if (IS_ERR(s)) 1096 if (IS_ERR(s))
1096 return ERR_CAST(s); 1097 return ERR_CAST(s);
1097 if (!s->s_root) { 1098 if (!s->s_root) {
1098 s->s_flags = flags;
1099 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 1099 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
1100 if (error) { 1100 if (error) {
1101 deactivate_locked_super(s); 1101 deactivate_locked_super(s);
diff --git a/fs/sync.c b/fs/sync.c
index 11e3d1c44901..eb8722dc556f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -29,16 +29,6 @@
29 */ 29 */
30static int __sync_filesystem(struct super_block *sb, int wait) 30static int __sync_filesystem(struct super_block *sb, int wait)
31{ 31{
32 /*
33 * This should be safe, as we require bdi backing to actually
34 * write out data in the first place
35 */
36 if (sb->s_bdi == &noop_backing_dev_info)
37 return 0;
38
39 if (sb->s_qcop && sb->s_qcop->quota_sync)
40 sb->s_qcop->quota_sync(sb, -1, wait);
41
42 if (wait) 32 if (wait)
43 sync_inodes_sb(sb); 33 sync_inodes_sb(sb);
44 else 34 else
@@ -77,29 +67,48 @@ int sync_filesystem(struct super_block *sb)
77} 67}
78EXPORT_SYMBOL_GPL(sync_filesystem); 68EXPORT_SYMBOL_GPL(sync_filesystem);
79 69
80static void sync_one_sb(struct super_block *sb, void *arg) 70static void sync_inodes_one_sb(struct super_block *sb, void *arg)
81{ 71{
82 if (!(sb->s_flags & MS_RDONLY)) 72 if (!(sb->s_flags & MS_RDONLY))
83 __sync_filesystem(sb, *(int *)arg); 73 sync_inodes_sb(sb);
84} 74}
85/* 75
86 * Sync all the data for all the filesystems (called by sys_sync() and 76static void sync_fs_one_sb(struct super_block *sb, void *arg)
87 * emergency sync)
88 */
89static void sync_filesystems(int wait)
90{ 77{
91 iterate_supers(sync_one_sb, &wait); 78 if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs)
79 sb->s_op->sync_fs(sb, *(int *)arg);
80}
81
82static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
83{
84 filemap_fdatawrite(bdev->bd_inode->i_mapping);
85}
86
87static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
88{
89 filemap_fdatawait(bdev->bd_inode->i_mapping);
92} 90}
93 91
94/* 92/*
95 * sync everything. Start out by waking pdflush, because that writes back 93 * Sync everything. We start by waking flusher threads so that most of
96 * all queues in parallel. 94 * writeback runs on all devices in parallel. Then we sync all inodes reliably
95 * which effectively also waits for all flusher threads to finish doing
96 * writeback. At this point all data is on disk so metadata should be stable
97 * and we tell filesystems to sync their metadata via ->sync_fs() calls.
98 * Finally, we writeout all block devices because some filesystems (e.g. ext2)
99 * just write metadata (such as inodes or bitmaps) to block device page cache
100 * and do not sync it on their own in ->sync_fs().
97 */ 101 */
98SYSCALL_DEFINE0(sync) 102SYSCALL_DEFINE0(sync)
99{ 103{
104 int nowait = 0, wait = 1;
105
100 wakeup_flusher_threads(0, WB_REASON_SYNC); 106 wakeup_flusher_threads(0, WB_REASON_SYNC);
101 sync_filesystems(0); 107 iterate_supers(sync_inodes_one_sb, NULL);
102 sync_filesystems(1); 108 iterate_supers(sync_fs_one_sb, &nowait);
109 iterate_supers(sync_fs_one_sb, &wait);
110 iterate_bdevs(fdatawrite_one_bdev, NULL);
111 iterate_bdevs(fdatawait_one_bdev, NULL);
103 if (unlikely(laptop_mode)) 112 if (unlikely(laptop_mode))
104 laptop_sync_completion(); 113 laptop_sync_completion();
105 return 0; 114 return 0;
@@ -107,12 +116,18 @@ SYSCALL_DEFINE0(sync)
107 116
108static void do_sync_work(struct work_struct *work) 117static void do_sync_work(struct work_struct *work)
109{ 118{
119 int nowait = 0;
120
110 /* 121 /*
111 * Sync twice to reduce the possibility we skipped some inodes / pages 122 * Sync twice to reduce the possibility we skipped some inodes / pages
112 * because they were temporarily locked 123 * because they were temporarily locked
113 */ 124 */
114 sync_filesystems(0); 125 iterate_supers(sync_inodes_one_sb, &nowait);
115 sync_filesystems(0); 126 iterate_supers(sync_fs_one_sb, &nowait);
127 iterate_bdevs(fdatawrite_one_bdev, NULL);
128 iterate_supers(sync_inodes_one_sb, &nowait);
129 iterate_supers(sync_fs_one_sb, &nowait);
130 iterate_bdevs(fdatawrite_one_bdev, NULL);
116 printk("Emergency Sync complete\n"); 131 printk("Emergency Sync complete\n");
117 kfree(work); 132 kfree(work);
118} 133}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index e6bb9b2a4cbe..a5cf784f9cc2 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -300,15 +300,15 @@ void release_sysfs_dirent(struct sysfs_dirent * sd)
300static int sysfs_dentry_delete(const struct dentry *dentry) 300static int sysfs_dentry_delete(const struct dentry *dentry)
301{ 301{
302 struct sysfs_dirent *sd = dentry->d_fsdata; 302 struct sysfs_dirent *sd = dentry->d_fsdata;
303 return !!(sd->s_flags & SYSFS_FLAG_REMOVED); 303 return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED));
304} 304}
305 305
306static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) 306static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
307{ 307{
308 struct sysfs_dirent *sd; 308 struct sysfs_dirent *sd;
309 int is_dir; 309 int is_dir;
310 310
311 if (nd->flags & LOOKUP_RCU) 311 if (flags & LOOKUP_RCU)
312 return -ECHILD; 312 return -ECHILD;
313 313
314 sd = dentry->d_fsdata; 314 sd = dentry->d_fsdata;
@@ -355,18 +355,15 @@ out_bad:
355 return 0; 355 return 0;
356} 356}
357 357
358static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode) 358static void sysfs_dentry_release(struct dentry *dentry)
359{ 359{
360 struct sysfs_dirent * sd = dentry->d_fsdata; 360 sysfs_put(dentry->d_fsdata);
361
362 sysfs_put(sd);
363 iput(inode);
364} 361}
365 362
366static const struct dentry_operations sysfs_dentry_ops = { 363const struct dentry_operations sysfs_dentry_ops = {
367 .d_revalidate = sysfs_dentry_revalidate, 364 .d_revalidate = sysfs_dentry_revalidate,
368 .d_delete = sysfs_dentry_delete, 365 .d_delete = sysfs_dentry_delete,
369 .d_iput = sysfs_dentry_iput, 366 .d_release = sysfs_dentry_release,
370}; 367};
371 368
372struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) 369struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
@@ -764,7 +761,7 @@ int sysfs_create_dir(struct kobject * kobj)
764} 761}
765 762
766static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, 763static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
767 struct nameidata *nd) 764 unsigned int flags)
768{ 765{
769 struct dentry *ret = NULL; 766 struct dentry *ret = NULL;
770 struct dentry *parent = dentry->d_parent; 767 struct dentry *parent = dentry->d_parent;
@@ -786,6 +783,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
786 ret = ERR_PTR(-ENOENT); 783 ret = ERR_PTR(-ENOENT);
787 goto out_unlock; 784 goto out_unlock;
788 } 785 }
786 dentry->d_fsdata = sysfs_get(sd);
789 787
790 /* attach dentry and inode */ 788 /* attach dentry and inode */
791 inode = sysfs_get_inode(dir->i_sb, sd); 789 inode = sysfs_get_inode(dir->i_sb, sd);
@@ -795,16 +793,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
795 } 793 }
796 794
797 /* instantiate and hash dentry */ 795 /* instantiate and hash dentry */
798 ret = d_find_alias(inode); 796 ret = d_materialise_unique(dentry, inode);
799 if (!ret) {
800 d_set_d_op(dentry, &sysfs_dentry_ops);
801 dentry->d_fsdata = sysfs_get(sd);
802 d_add(dentry, inode);
803 } else {
804 d_move(ret, dentry);
805 iput(inode);
806 }
807
808 out_unlock: 797 out_unlock:
809 mutex_unlock(&sysfs_mutex); 798 mutex_unlock(&sysfs_mutex);
810 return ret; 799 return ret;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 52c3bdb66a84..71eb7e253927 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -68,6 +68,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
68 } 68 }
69 root->d_fsdata = &sysfs_root; 69 root->d_fsdata = &sysfs_root;
70 sb->s_root = root; 70 sb->s_root = root;
71 sb->s_d_op = &sysfs_dentry_ops;
71 return 0; 72 return 0;
72} 73}
73 74
@@ -117,13 +118,12 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
117 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) 118 for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
118 info->ns[type] = kobj_ns_grab_current(type); 119 info->ns[type] = kobj_ns_grab_current(type);
119 120
120 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); 121 sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info);
121 if (IS_ERR(sb) || sb->s_fs_info != info) 122 if (IS_ERR(sb) || sb->s_fs_info != info)
122 free_sysfs_super_info(info); 123 free_sysfs_super_info(info);
123 if (IS_ERR(sb)) 124 if (IS_ERR(sb))
124 return ERR_CAST(sb); 125 return ERR_CAST(sb);
125 if (!sb->s_root) { 126 if (!sb->s_root) {
126 sb->s_flags = flags;
127 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 127 error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
128 if (error) { 128 if (error) {
129 deactivate_locked_super(sb); 129 deactivate_locked_super(sb);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 661a9639570b..d73c0932bbd6 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -157,6 +157,7 @@ extern struct kmem_cache *sysfs_dir_cachep;
157 */ 157 */
158extern struct mutex sysfs_mutex; 158extern struct mutex sysfs_mutex;
159extern spinlock_t sysfs_assoc_lock; 159extern spinlock_t sysfs_assoc_lock;
160extern const struct dentry_operations sysfs_dentry_ops;
160 161
161extern const struct file_operations sysfs_dir_operations; 162extern const struct file_operations sysfs_dir_operations;
162extern const struct inode_operations sysfs_dir_inode_operations; 163extern const struct inode_operations sysfs_dir_inode_operations;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 08d0b2568cd3..80e1e2b18df1 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -43,7 +43,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
43 * then attach current time stamp. 43 * then attach current time stamp.
44 * But if the filesystem was marked clean, keep it clean. 44 * But if the filesystem was marked clean, keep it clean.
45 */ 45 */
46 sb->s_dirt = 0;
47 old_time = fs32_to_cpu(sbi, *sbi->s_sb_time); 46 old_time = fs32_to_cpu(sbi, *sbi->s_sb_time);
48 if (sbi->s_type == FSTYPE_SYSV4) { 47 if (sbi->s_type == FSTYPE_SYSV4) {
49 if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time)) 48 if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time))
@@ -57,23 +56,12 @@ static int sysv_sync_fs(struct super_block *sb, int wait)
57 return 0; 56 return 0;
58} 57}
59 58
60static void sysv_write_super(struct super_block *sb)
61{
62 if (!(sb->s_flags & MS_RDONLY))
63 sysv_sync_fs(sb, 1);
64 else
65 sb->s_dirt = 0;
66}
67
68static int sysv_remount(struct super_block *sb, int *flags, char *data) 59static int sysv_remount(struct super_block *sb, int *flags, char *data)
69{ 60{
70 struct sysv_sb_info *sbi = SYSV_SB(sb); 61 struct sysv_sb_info *sbi = SYSV_SB(sb);
71 lock_super(sb); 62
72 if (sbi->s_forced_ro) 63 if (sbi->s_forced_ro)
73 *flags |= MS_RDONLY; 64 *flags |= MS_RDONLY;
74 if (*flags & MS_RDONLY)
75 sysv_write_super(sb);
76 unlock_super(sb);
77 return 0; 65 return 0;
78} 66}
79 67
@@ -81,9 +69,6 @@ static void sysv_put_super(struct super_block *sb)
81{ 69{
82 struct sysv_sb_info *sbi = SYSV_SB(sb); 70 struct sysv_sb_info *sbi = SYSV_SB(sb);
83 71
84 if (sb->s_dirt)
85 sysv_write_super(sb);
86
87 if (!(sb->s_flags & MS_RDONLY)) { 72 if (!(sb->s_flags & MS_RDONLY)) {
88 /* XXX ext2 also updates the state here */ 73 /* XXX ext2 also updates the state here */
89 mark_buffer_dirty(sbi->s_bh1); 74 mark_buffer_dirty(sbi->s_bh1);
@@ -357,7 +342,6 @@ const struct super_operations sysv_sops = {
357 .write_inode = sysv_write_inode, 342 .write_inode = sysv_write_inode,
358 .evict_inode = sysv_evict_inode, 343 .evict_inode = sysv_evict_inode,
359 .put_super = sysv_put_super, 344 .put_super = sysv_put_super,
360 .write_super = sysv_write_super,
361 .sync_fs = sysv_sync_fs, 345 .sync_fs = sysv_sync_fs,
362 .remount_fs = sysv_remount, 346 .remount_fs = sysv_remount,
363 .statfs = sysv_statfs, 347 .statfs = sysv_statfs,
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index d7466e293614..1c0d5f264767 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -43,7 +43,7 @@ const struct dentry_operations sysv_dentry_operations = {
43 .d_hash = sysv_hash, 43 .d_hash = sysv_hash,
44}; 44};
45 45
46static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) 46static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
47{ 47{
48 struct inode * inode = NULL; 48 struct inode * inode = NULL;
49 ino_t ino; 49 ino_t ino;
@@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode,
80 return err; 80 return err;
81} 81}
82 82
83static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd) 83static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
84{ 84{
85 return sysv_mknod(dir, dentry, mode, 0); 85 return sysv_mknod(dir, dentry, mode, 0);
86} 86}
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 11b07672f6c5..0bc35fdc58e2 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -117,7 +117,6 @@ static inline void dirty_sb(struct super_block *sb)
117 mark_buffer_dirty(sbi->s_bh1); 117 mark_buffer_dirty(sbi->s_bh1);
118 if (sbi->s_bh1 != sbi->s_bh2) 118 if (sbi->s_bh1 != sbi->s_bh2)
119 mark_buffer_dirty(sbi->s_bh2); 119 mark_buffer_dirty(sbi->s_bh2);
120 sb->s_dirt = 1;
121} 120}
122 121
123 122
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 92df3b081539..bb3167257aab 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2802,6 +2802,8 @@ static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count,
2802 val = d->chk_fs; 2802 val = d->chk_fs;
2803 else if (dent == d->dfs_tst_rcvry) 2803 else if (dent == d->dfs_tst_rcvry)
2804 val = d->tst_rcvry; 2804 val = d->tst_rcvry;
2805 else if (dent == d->dfs_ro_error)
2806 val = c->ro_error;
2805 else 2807 else
2806 return -EINVAL; 2808 return -EINVAL;
2807 2809
@@ -2885,6 +2887,8 @@ static ssize_t dfs_file_write(struct file *file, const char __user *u,
2885 d->chk_fs = val; 2887 d->chk_fs = val;
2886 else if (dent == d->dfs_tst_rcvry) 2888 else if (dent == d->dfs_tst_rcvry)
2887 d->tst_rcvry = val; 2889 d->tst_rcvry = val;
2890 else if (dent == d->dfs_ro_error)
2891 c->ro_error = !!val;
2888 else 2892 else
2889 return -EINVAL; 2893 return -EINVAL;
2890 2894
@@ -2996,6 +3000,13 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2996 goto out_remove; 3000 goto out_remove;
2997 d->dfs_tst_rcvry = dent; 3001 d->dfs_tst_rcvry = dent;
2998 3002
3003 fname = "ro_error";
3004 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
3005 &dfs_fops);
3006 if (IS_ERR_OR_NULL(dent))
3007 goto out_remove;
3008 d->dfs_ro_error = dent;
3009
2999 return 0; 3010 return 0;
3000 3011
3001out_remove: 3012out_remove:
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 486a8e024fb6..8b8cc4e945f4 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -79,6 +79,10 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
79 * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks 79 * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
80 * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks 80 * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
81 * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing 81 * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
82 * @dfs_ro_error: debugfs knob to switch UBIFS to R/O mode (different to
83 * re-mounting to R/O mode because it does not flush any buffers
84 * and UBIFS just starts returning -EROFS on all write
85 * operations)
82 */ 86 */
83struct ubifs_debug_info { 87struct ubifs_debug_info {
84 struct ubifs_zbranch old_zroot; 88 struct ubifs_zbranch old_zroot;
@@ -122,6 +126,7 @@ struct ubifs_debug_info {
122 struct dentry *dfs_chk_lprops; 126 struct dentry *dfs_chk_lprops;
123 struct dentry *dfs_chk_fs; 127 struct dentry *dfs_chk_fs;
124 struct dentry *dfs_tst_rcvry; 128 struct dentry *dfs_tst_rcvry;
129 struct dentry *dfs_ro_error;
125}; 130};
126 131
127/** 132/**
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index a6d42efc76d2..c95681cf1b71 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -184,7 +184,7 @@ static int dbg_check_name(const struct ubifs_info *c,
184} 184}
185 185
186static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, 186static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
187 struct nameidata *nd) 187 unsigned int flags)
188{ 188{
189 int err; 189 int err;
190 union ubifs_key key; 190 union ubifs_key key;
@@ -246,7 +246,7 @@ out:
246} 246}
247 247
248static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, 248static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
249 struct nameidata *nd) 249 bool excl)
250{ 250{
251 struct inode *inode; 251 struct inode *inode;
252 struct ubifs_info *c = dir->i_sb->s_fs_info; 252 struct ubifs_info *c = dir->i_sb->s_fs_info;
@@ -969,7 +969,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
969 struct ubifs_budget_req ino_req = { .dirtied_ino = 1, 969 struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
970 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; 970 .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
971 struct timespec time; 971 struct timespec time;
972 unsigned int saved_nlink; 972 unsigned int uninitialized_var(saved_nlink);
973 973
974 /* 974 /*
975 * Budget request settings: deletion direntry, new direntry, removing 975 * Budget request settings: deletion direntry, new direntry, removing
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index b02734db187c..cebf17ea0458 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -176,7 +176,7 @@ int ubifs_orphan_start_commit(struct ubifs_info *c)
176 *last = orphan; 176 *last = orphan;
177 last = &orphan->cnext; 177 last = &orphan->cnext;
178 } 178 }
179 *last = orphan->cnext; 179 *last = NULL;
180 c->cmt_orphans = c->new_orphans; 180 c->cmt_orphans = c->new_orphans;
181 c->new_orphans = 0; 181 c->new_orphans = 0;
182 dbg_cmt("%d orphans to commit", c->cmt_orphans); 182 dbg_cmt("%d orphans to commit", c->cmt_orphans);
@@ -382,7 +382,7 @@ static int consolidate(struct ubifs_info *c)
382 last = &orphan->cnext; 382 last = &orphan->cnext;
383 cnt += 1; 383 cnt += 1;
384 } 384 }
385 *last = orphan->cnext; 385 *last = NULL;
386 ubifs_assert(cnt == c->tot_orphans - c->new_orphans); 386 ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
387 c->cmt_orphans = cnt; 387 c->cmt_orphans = cnt;
388 c->ohead_lnum = c->orph_first; 388 c->ohead_lnum = c->orph_first;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 3a2da7e476e5..eba46d4a7619 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -1007,7 +1007,7 @@ out:
1007 */ 1007 */
1008int ubifs_replay_journal(struct ubifs_info *c) 1008int ubifs_replay_journal(struct ubifs_info *c)
1009{ 1009{
1010 int err, i, lnum, offs, free; 1010 int err, lnum, free;
1011 1011
1012 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1012 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1013 1013
@@ -1025,25 +1025,17 @@ int ubifs_replay_journal(struct ubifs_info *c)
1025 dbg_mnt("start replaying the journal"); 1025 dbg_mnt("start replaying the journal");
1026 c->replaying = 1; 1026 c->replaying = 1;
1027 lnum = c->ltail_lnum = c->lhead_lnum; 1027 lnum = c->ltail_lnum = c->lhead_lnum;
1028 offs = c->lhead_offs;
1029 1028
1030 for (i = 0; i < c->log_lebs; i++, lnum++) { 1029 lnum = UBIFS_LOG_LNUM;
1031 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { 1030 do {
1032 /* 1031 err = replay_log_leb(c, lnum, 0, c->sbuf);
1033 * The log is logically circular, we reached the last
1034 * LEB, switch to the first one.
1035 */
1036 lnum = UBIFS_LOG_LNUM;
1037 offs = 0;
1038 }
1039 err = replay_log_leb(c, lnum, offs, c->sbuf);
1040 if (err == 1) 1032 if (err == 1)
1041 /* We hit the end of the log */ 1033 /* We hit the end of the log */
1042 break; 1034 break;
1043 if (err) 1035 if (err)
1044 goto out; 1036 goto out;
1045 offs = 0; 1037 lnum = ubifs_next_log_lnum(c, lnum);
1046 } 1038 } while (lnum != UBIFS_LOG_LNUM);
1047 1039
1048 err = replay_buds(c); 1040 err = replay_buds(c);
1049 if (err) 1041 if (err)
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index ef3d1ba6d992..15e2fc5aa60b 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -718,8 +718,12 @@ static int fixup_free_space(struct ubifs_info *c)
718 lnum = ubifs_next_log_lnum(c, lnum); 718 lnum = ubifs_next_log_lnum(c, lnum);
719 } 719 }
720 720
721 /* Fixup the current log head */ 721 /*
722 err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); 722 * Fixup the log head which contains the only a CS node at the
723 * beginning.
724 */
725 err = fixup_leb(c, c->lhead_lnum,
726 ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size));
723 if (err) 727 if (err)
724 goto out; 728 goto out;
725 729
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 5862dd9d2784..1c766c39c038 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2136,7 +2136,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2136 2136
2137 dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); 2137 dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2138 2138
2139 sb = sget(fs_type, sb_test, sb_set, c); 2139 sb = sget(fs_type, sb_test, sb_set, flags, c);
2140 if (IS_ERR(sb)) { 2140 if (IS_ERR(sb)) {
2141 err = PTR_ERR(sb); 2141 err = PTR_ERR(sb);
2142 kfree(c); 2142 kfree(c);
@@ -2153,7 +2153,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2153 goto out_deact; 2153 goto out_deact;
2154 } 2154 }
2155 } else { 2155 } else {
2156 sb->s_flags = flags;
2157 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 2156 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
2158 if (err) 2157 if (err)
2159 goto out_deact; 2158 goto out_deact;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 873e1bab9c4c..fafaad795cd6 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1247,7 +1247,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1247{ 1247{
1248 struct fileEntry *fe; 1248 struct fileEntry *fe;
1249 struct extendedFileEntry *efe; 1249 struct extendedFileEntry *efe;
1250 int offset;
1251 struct udf_sb_info *sbi = UDF_SB(inode->i_sb); 1250 struct udf_sb_info *sbi = UDF_SB(inode->i_sb);
1252 struct udf_inode_info *iinfo = UDF_I(inode); 1251 struct udf_inode_info *iinfo = UDF_I(inode);
1253 unsigned int link_count; 1252 unsigned int link_count;
@@ -1359,7 +1358,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1359 iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); 1358 iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
1360 iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs); 1359 iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs);
1361 iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint); 1360 iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint);
1362 offset = sizeof(struct fileEntry) + iinfo->i_lenEAttr;
1363 } else { 1361 } else {
1364 inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << 1362 inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) <<
1365 (inode->i_sb->s_blocksize_bits - 9); 1363 (inode->i_sb->s_blocksize_bits - 9);
@@ -1381,8 +1379,6 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
1381 iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); 1379 iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
1382 iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); 1380 iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs);
1383 iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); 1381 iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint);
1384 offset = sizeof(struct extendedFileEntry) +
1385 iinfo->i_lenEAttr;
1386 } 1382 }
1387 1383
1388 switch (fe->icbTag.fileType) { 1384 switch (fe->icbTag.fileType) {
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 18024178ac4c..95fee278ab9d 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -251,7 +251,7 @@ out_ok:
251} 251}
252 252
253static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, 253static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
254 struct nameidata *nd) 254 unsigned int flags)
255{ 255{
256 struct inode *inode = NULL; 256 struct inode *inode = NULL;
257 struct fileIdentDesc cfi; 257 struct fileIdentDesc cfi;
@@ -551,7 +551,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
551} 551}
552 552
553static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, 553static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
554 struct nameidata *nd) 554 bool excl)
555{ 555{
556 struct udf_fileident_bh fibh; 556 struct udf_fileident_bh fibh;
557 struct inode *inode; 557 struct inode *inode;
@@ -1279,6 +1279,7 @@ static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
1279 *lenp = 3; 1279 *lenp = 3;
1280 fid->udf.block = location.logicalBlockNum; 1280 fid->udf.block = location.logicalBlockNum;
1281 fid->udf.partref = location.partitionReferenceNum; 1281 fid->udf.partref = location.partitionReferenceNum;
1282 fid->udf.parent_partref = 0;
1282 fid->udf.generation = inode->i_generation; 1283 fid->udf.generation = inode->i_generation;
1283 1284
1284 if (parent) { 1285 if (parent) {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 8d86a8706c0e..dcbf98722afc 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -252,6 +252,63 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
252 return 0; 252 return 0;
253} 253}
254 254
255static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
256{
257 int i;
258 int nr_groups = bitmap->s_nr_groups;
259 int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
260 nr_groups);
261
262 for (i = 0; i < nr_groups; i++)
263 if (bitmap->s_block_bitmap[i])
264 brelse(bitmap->s_block_bitmap[i]);
265
266 if (size <= PAGE_SIZE)
267 kfree(bitmap);
268 else
269 vfree(bitmap);
270}
271
272static void udf_free_partition(struct udf_part_map *map)
273{
274 int i;
275 struct udf_meta_data *mdata;
276
277 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
278 iput(map->s_uspace.s_table);
279 if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
280 iput(map->s_fspace.s_table);
281 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
282 udf_sb_free_bitmap(map->s_uspace.s_bitmap);
283 if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
284 udf_sb_free_bitmap(map->s_fspace.s_bitmap);
285 if (map->s_partition_type == UDF_SPARABLE_MAP15)
286 for (i = 0; i < 4; i++)
287 brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
288 else if (map->s_partition_type == UDF_METADATA_MAP25) {
289 mdata = &map->s_type_specific.s_metadata;
290 iput(mdata->s_metadata_fe);
291 mdata->s_metadata_fe = NULL;
292
293 iput(mdata->s_mirror_fe);
294 mdata->s_mirror_fe = NULL;
295
296 iput(mdata->s_bitmap_fe);
297 mdata->s_bitmap_fe = NULL;
298 }
299}
300
301static void udf_sb_free_partitions(struct super_block *sb)
302{
303 struct udf_sb_info *sbi = UDF_SB(sb);
304 int i;
305
306 for (i = 0; i < sbi->s_partitions; i++)
307 udf_free_partition(&sbi->s_partmaps[i]);
308 kfree(sbi->s_partmaps);
309 sbi->s_partmaps = NULL;
310}
311
255static int udf_show_options(struct seq_file *seq, struct dentry *root) 312static int udf_show_options(struct seq_file *seq, struct dentry *root)
256{ 313{
257 struct super_block *sb = root->d_sb; 314 struct super_block *sb = root->d_sb;
@@ -1283,7 +1340,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
1283 BUG_ON(ident != TAG_IDENT_LVD); 1340 BUG_ON(ident != TAG_IDENT_LVD);
1284 lvd = (struct logicalVolDesc *)bh->b_data; 1341 lvd = (struct logicalVolDesc *)bh->b_data;
1285 table_len = le32_to_cpu(lvd->mapTableLength); 1342 table_len = le32_to_cpu(lvd->mapTableLength);
1286 if (sizeof(*lvd) + table_len > sb->s_blocksize) { 1343 if (table_len > sb->s_blocksize - sizeof(*lvd)) {
1287 udf_err(sb, "error loading logical volume descriptor: " 1344 udf_err(sb, "error loading logical volume descriptor: "
1288 "Partition table too long (%u > %lu)\n", table_len, 1345 "Partition table too long (%u > %lu)\n", table_len,
1289 sb->s_blocksize - sizeof(*lvd)); 1346 sb->s_blocksize - sizeof(*lvd));
@@ -1596,7 +1653,11 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
1596 /* responsible for finding the PartitionDesc(s) */ 1653 /* responsible for finding the PartitionDesc(s) */
1597 if (!udf_process_sequence(sb, main_s, main_e, fileset)) 1654 if (!udf_process_sequence(sb, main_s, main_e, fileset))
1598 return 1; 1655 return 1;
1599 return !udf_process_sequence(sb, reserve_s, reserve_e, fileset); 1656 udf_sb_free_partitions(sb);
1657 if (!udf_process_sequence(sb, reserve_s, reserve_e, fileset))
1658 return 1;
1659 udf_sb_free_partitions(sb);
1660 return 0;
1600} 1661}
1601 1662
1602/* 1663/*
@@ -1861,55 +1922,8 @@ u64 lvid_get_unique_id(struct super_block *sb)
1861 return ret; 1922 return ret;
1862} 1923}
1863 1924
1864static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
1865{
1866 int i;
1867 int nr_groups = bitmap->s_nr_groups;
1868 int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
1869 nr_groups);
1870
1871 for (i = 0; i < nr_groups; i++)
1872 if (bitmap->s_block_bitmap[i])
1873 brelse(bitmap->s_block_bitmap[i]);
1874
1875 if (size <= PAGE_SIZE)
1876 kfree(bitmap);
1877 else
1878 vfree(bitmap);
1879}
1880
1881static void udf_free_partition(struct udf_part_map *map)
1882{
1883 int i;
1884 struct udf_meta_data *mdata;
1885
1886 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_TABLE)
1887 iput(map->s_uspace.s_table);
1888 if (map->s_partition_flags & UDF_PART_FLAG_FREED_TABLE)
1889 iput(map->s_fspace.s_table);
1890 if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP)
1891 udf_sb_free_bitmap(map->s_uspace.s_bitmap);
1892 if (map->s_partition_flags & UDF_PART_FLAG_FREED_BITMAP)
1893 udf_sb_free_bitmap(map->s_fspace.s_bitmap);
1894 if (map->s_partition_type == UDF_SPARABLE_MAP15)
1895 for (i = 0; i < 4; i++)
1896 brelse(map->s_type_specific.s_sparing.s_spar_map[i]);
1897 else if (map->s_partition_type == UDF_METADATA_MAP25) {
1898 mdata = &map->s_type_specific.s_metadata;
1899 iput(mdata->s_metadata_fe);
1900 mdata->s_metadata_fe = NULL;
1901
1902 iput(mdata->s_mirror_fe);
1903 mdata->s_mirror_fe = NULL;
1904
1905 iput(mdata->s_bitmap_fe);
1906 mdata->s_bitmap_fe = NULL;
1907 }
1908}
1909
1910static int udf_fill_super(struct super_block *sb, void *options, int silent) 1925static int udf_fill_super(struct super_block *sb, void *options, int silent)
1911{ 1926{
1912 int i;
1913 int ret; 1927 int ret;
1914 struct inode *inode = NULL; 1928 struct inode *inode = NULL;
1915 struct udf_options uopt; 1929 struct udf_options uopt;
@@ -1974,7 +1988,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
1974 sb->s_op = &udf_sb_ops; 1988 sb->s_op = &udf_sb_ops;
1975 sb->s_export_op = &udf_export_ops; 1989 sb->s_export_op = &udf_export_ops;
1976 1990
1977 sb->s_dirt = 0;
1978 sb->s_magic = UDF_SUPER_MAGIC; 1991 sb->s_magic = UDF_SUPER_MAGIC;
1979 sb->s_time_gran = 1000; 1992 sb->s_time_gran = 1000;
1980 1993
@@ -2072,9 +2085,6 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
2072error_out: 2085error_out:
2073 if (sbi->s_vat_inode) 2086 if (sbi->s_vat_inode)
2074 iput(sbi->s_vat_inode); 2087 iput(sbi->s_vat_inode);
2075 if (sbi->s_partitions)
2076 for (i = 0; i < sbi->s_partitions; i++)
2077 udf_free_partition(&sbi->s_partmaps[i]);
2078#ifdef CONFIG_UDF_NLS 2088#ifdef CONFIG_UDF_NLS
2079 if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) 2089 if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
2080 unload_nls(sbi->s_nls_map); 2090 unload_nls(sbi->s_nls_map);
@@ -2082,8 +2092,7 @@ error_out:
2082 if (!(sb->s_flags & MS_RDONLY)) 2092 if (!(sb->s_flags & MS_RDONLY))
2083 udf_close_lvid(sb); 2093 udf_close_lvid(sb);
2084 brelse(sbi->s_lvid_bh); 2094 brelse(sbi->s_lvid_bh);
2085 2095 udf_sb_free_partitions(sb);
2086 kfree(sbi->s_partmaps);
2087 kfree(sbi); 2096 kfree(sbi);
2088 sb->s_fs_info = NULL; 2097 sb->s_fs_info = NULL;
2089 2098
@@ -2096,10 +2105,6 @@ void _udf_err(struct super_block *sb, const char *function,
2096 struct va_format vaf; 2105 struct va_format vaf;
2097 va_list args; 2106 va_list args;
2098 2107
2099 /* mark sb error */
2100 if (!(sb->s_flags & MS_RDONLY))
2101 sb->s_dirt = 1;
2102
2103 va_start(args, fmt); 2108 va_start(args, fmt);
2104 2109
2105 vaf.fmt = fmt; 2110 vaf.fmt = fmt;
@@ -2128,16 +2133,12 @@ void _udf_warn(struct super_block *sb, const char *function,
2128 2133
2129static void udf_put_super(struct super_block *sb) 2134static void udf_put_super(struct super_block *sb)
2130{ 2135{
2131 int i;
2132 struct udf_sb_info *sbi; 2136 struct udf_sb_info *sbi;
2133 2137
2134 sbi = UDF_SB(sb); 2138 sbi = UDF_SB(sb);
2135 2139
2136 if (sbi->s_vat_inode) 2140 if (sbi->s_vat_inode)
2137 iput(sbi->s_vat_inode); 2141 iput(sbi->s_vat_inode);
2138 if (sbi->s_partitions)
2139 for (i = 0; i < sbi->s_partitions; i++)
2140 udf_free_partition(&sbi->s_partmaps[i]);
2141#ifdef CONFIG_UDF_NLS 2142#ifdef CONFIG_UDF_NLS
2142 if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) 2143 if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
2143 unload_nls(sbi->s_nls_map); 2144 unload_nls(sbi->s_nls_map);
@@ -2145,7 +2146,7 @@ static void udf_put_super(struct super_block *sb)
2145 if (!(sb->s_flags & MS_RDONLY)) 2146 if (!(sb->s_flags & MS_RDONLY))
2146 udf_close_lvid(sb); 2147 udf_close_lvid(sb);
2147 brelse(sbi->s_lvid_bh); 2148 brelse(sbi->s_lvid_bh);
2148 kfree(sbi->s_partmaps); 2149 udf_sb_free_partitions(sb);
2149 kfree(sb->s_fs_info); 2150 kfree(sb->s_fs_info);
2150 sb->s_fs_info = NULL; 2151 sb->s_fs_info = NULL;
2151} 2152}
@@ -2161,7 +2162,6 @@ static int udf_sync_fs(struct super_block *sb, int wait)
2161 * the buffer for IO 2162 * the buffer for IO
2162 */ 2163 */
2163 mark_buffer_dirty(sbi->s_lvid_bh); 2164 mark_buffer_dirty(sbi->s_lvid_bh);
2164 sb->s_dirt = 0;
2165 sbi->s_lvid_dirty = 0; 2165 sbi->s_lvid_dirty = 0;
2166 } 2166 }
2167 mutex_unlock(&sbi->s_alloc_mutex); 2167 mutex_unlock(&sbi->s_alloc_mutex);
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 4b98fee8e161..8a9657d7f7c6 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -248,7 +248,7 @@ void udf_truncate_extents(struct inode *inode)
248 /* We managed to free all extents in the 248 /* We managed to free all extents in the
249 * indirect extent - free it too */ 249 * indirect extent - free it too */
250 BUG_ON(!epos.bh); 250 BUG_ON(!epos.bh);
251 udf_free_blocks(sb, inode, &epos.block, 251 udf_free_blocks(sb, NULL, &epos.block,
252 0, indirect_ext_len); 252 0, indirect_ext_len);
253 } else if (!epos.bh) { 253 } else if (!epos.bh) {
254 iinfo->i_lenAlloc = lenalloc; 254 iinfo->i_lenAlloc = lenalloc;
@@ -275,7 +275,7 @@ void udf_truncate_extents(struct inode *inode)
275 275
276 if (indirect_ext_len) { 276 if (indirect_ext_len) {
277 BUG_ON(!epos.bh); 277 BUG_ON(!epos.bh);
278 udf_free_blocks(sb, inode, &epos.block, 0, indirect_ext_len); 278 udf_free_blocks(sb, NULL, &epos.block, 0, indirect_ext_len);
279 } else if (!epos.bh) { 279 } else if (!epos.bh) {
280 iinfo->i_lenAlloc = lenalloc; 280 iinfo->i_lenAlloc = lenalloc;
281 mark_inode_dirty(inode); 281 mark_inode_dirty(inode);
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index ebe10314e512..de038da6f6bd 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -129,7 +129,6 @@ static inline void udf_updated_lvid(struct super_block *sb)
129 WARN_ON_ONCE(((struct logicalVolIntegrityDesc *) 129 WARN_ON_ONCE(((struct logicalVolIntegrityDesc *)
130 bh->b_data)->integrityType != 130 bh->b_data)->integrityType !=
131 cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN)); 131 cpu_to_le32(LVID_INTEGRITY_TYPE_OPEN));
132 sb->s_dirt = 1;
133 UDF_SB(sb)->s_lvid_dirty = 1; 132 UDF_SB(sb)->s_lvid_dirty = 1;
134} 133}
135extern u64 lvid_get_unique_id(struct super_block *sb); 134extern u64 lvid_get_unique_id(struct super_block *sb);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 42694e11c23d..1b3e410bf334 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -116,7 +116,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
116 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 116 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
117 if (sb->s_flags & MS_SYNCHRONOUS) 117 if (sb->s_flags & MS_SYNCHRONOUS)
118 ubh_sync_block(UCPI_UBH(ucpi)); 118 ubh_sync_block(UCPI_UBH(ucpi));
119 sb->s_dirt = 1; 119 ufs_mark_sb_dirty(sb);
120 120
121 unlock_super (sb); 121 unlock_super (sb);
122 UFSD("EXIT\n"); 122 UFSD("EXIT\n");
@@ -214,7 +214,7 @@ do_more:
214 goto do_more; 214 goto do_more;
215 } 215 }
216 216
217 sb->s_dirt = 1; 217 ufs_mark_sb_dirty(sb);
218 unlock_super (sb); 218 unlock_super (sb);
219 UFSD("EXIT\n"); 219 UFSD("EXIT\n");
220 return; 220 return;
@@ -557,7 +557,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
557 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 557 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
558 if (sb->s_flags & MS_SYNCHRONOUS) 558 if (sb->s_flags & MS_SYNCHRONOUS)
559 ubh_sync_block(UCPI_UBH(ucpi)); 559 ubh_sync_block(UCPI_UBH(ucpi));
560 sb->s_dirt = 1; 560 ufs_mark_sb_dirty(sb);
561 561
562 UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); 562 UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
563 563
@@ -677,7 +677,7 @@ succed:
677 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 677 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
678 if (sb->s_flags & MS_SYNCHRONOUS) 678 if (sb->s_flags & MS_SYNCHRONOUS)
679 ubh_sync_block(UCPI_UBH(ucpi)); 679 ubh_sync_block(UCPI_UBH(ucpi));
680 sb->s_dirt = 1; 680 ufs_mark_sb_dirty(sb);
681 681
682 result += cgno * uspi->s_fpg; 682 result += cgno * uspi->s_fpg;
683 UFSD("EXIT3, result %llu\n", (unsigned long long)result); 683 UFSD("EXIT3, result %llu\n", (unsigned long long)result);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 4ec5c1085a87..e84cbe21b986 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -116,7 +116,7 @@ void ufs_free_inode (struct inode * inode)
116 if (sb->s_flags & MS_SYNCHRONOUS) 116 if (sb->s_flags & MS_SYNCHRONOUS)
117 ubh_sync_block(UCPI_UBH(ucpi)); 117 ubh_sync_block(UCPI_UBH(ucpi));
118 118
119 sb->s_dirt = 1; 119 ufs_mark_sb_dirty(sb);
120 unlock_super (sb); 120 unlock_super (sb);
121 UFSD("EXIT\n"); 121 UFSD("EXIT\n");
122} 122}
@@ -288,7 +288,7 @@ cg_found:
288 ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); 288 ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
289 if (sb->s_flags & MS_SYNCHRONOUS) 289 if (sb->s_flags & MS_SYNCHRONOUS)
290 ubh_sync_block(UCPI_UBH(ucpi)); 290 ubh_sync_block(UCPI_UBH(ucpi));
291 sb->s_dirt = 1; 291 ufs_mark_sb_dirty(sb);
292 292
293 inode->i_ino = cg * uspi->s_ipg + bit; 293 inode->i_ino = cg * uspi->s_ipg + bit;
294 inode_init_owner(inode, dir, mode); 294 inode_init_owner(inode, dir, mode);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index a2281cadefa1..90d74b8f8eba 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -46,7 +46,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode)
46 return err; 46 return err;
47} 47}
48 48
49static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 49static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags)
50{ 50{
51 struct inode * inode = NULL; 51 struct inode * inode = NULL;
52 ino_t ino; 52 ino_t ino;
@@ -71,7 +71,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
71 * with d_instantiate(). 71 * with d_instantiate().
72 */ 72 */
73static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode, 73static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
74 struct nameidata *nd) 74 bool excl)
75{ 75{
76 struct inode *inode; 76 struct inode *inode;
77 int err; 77 int err;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 302f340d0071..444927e5706b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -302,7 +302,7 @@ void ufs_error (struct super_block * sb, const char * function,
302 if (!(sb->s_flags & MS_RDONLY)) { 302 if (!(sb->s_flags & MS_RDONLY)) {
303 usb1->fs_clean = UFS_FSBAD; 303 usb1->fs_clean = UFS_FSBAD;
304 ubh_mark_buffer_dirty(USPI_UBH(uspi)); 304 ubh_mark_buffer_dirty(USPI_UBH(uspi));
305 sb->s_dirt = 1; 305 ufs_mark_sb_dirty(sb);
306 sb->s_flags |= MS_RDONLY; 306 sb->s_flags |= MS_RDONLY;
307 } 307 }
308 va_start (args, fmt); 308 va_start (args, fmt);
@@ -334,7 +334,7 @@ void ufs_panic (struct super_block * sb, const char * function,
334 if (!(sb->s_flags & MS_RDONLY)) { 334 if (!(sb->s_flags & MS_RDONLY)) {
335 usb1->fs_clean = UFS_FSBAD; 335 usb1->fs_clean = UFS_FSBAD;
336 ubh_mark_buffer_dirty(USPI_UBH(uspi)); 336 ubh_mark_buffer_dirty(USPI_UBH(uspi));
337 sb->s_dirt = 1; 337 ufs_mark_sb_dirty(sb);
338 } 338 }
339 va_start (args, fmt); 339 va_start (args, fmt);
340 vsnprintf (error_buf, sizeof(error_buf), fmt, args); 340 vsnprintf (error_buf, sizeof(error_buf), fmt, args);
@@ -691,6 +691,83 @@ static void ufs_put_super_internal(struct super_block *sb)
691 UFSD("EXIT\n"); 691 UFSD("EXIT\n");
692} 692}
693 693
694static int ufs_sync_fs(struct super_block *sb, int wait)
695{
696 struct ufs_sb_private_info * uspi;
697 struct ufs_super_block_first * usb1;
698 struct ufs_super_block_third * usb3;
699 unsigned flags;
700
701 lock_ufs(sb);
702 lock_super(sb);
703
704 UFSD("ENTER\n");
705
706 flags = UFS_SB(sb)->s_flags;
707 uspi = UFS_SB(sb)->s_uspi;
708 usb1 = ubh_get_usb_first(uspi);
709 usb3 = ubh_get_usb_third(uspi);
710
711 usb1->fs_time = cpu_to_fs32(sb, get_seconds());
712 if ((flags & UFS_ST_MASK) == UFS_ST_SUN ||
713 (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
714 (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
715 ufs_set_fs_state(sb, usb1, usb3,
716 UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
717 ufs_put_cstotal(sb);
718
719 UFSD("EXIT\n");
720 unlock_super(sb);
721 unlock_ufs(sb);
722
723 return 0;
724}
725
726static void delayed_sync_fs(struct work_struct *work)
727{
728 struct ufs_sb_info *sbi;
729
730 sbi = container_of(work, struct ufs_sb_info, sync_work.work);
731
732 spin_lock(&sbi->work_lock);
733 sbi->work_queued = 0;
734 spin_unlock(&sbi->work_lock);
735
736 ufs_sync_fs(sbi->sb, 1);
737}
738
739void ufs_mark_sb_dirty(struct super_block *sb)
740{
741 struct ufs_sb_info *sbi = UFS_SB(sb);
742 unsigned long delay;
743
744 spin_lock(&sbi->work_lock);
745 if (!sbi->work_queued) {
746 delay = msecs_to_jiffies(dirty_writeback_interval * 10);
747 queue_delayed_work(system_long_wq, &sbi->sync_work, delay);
748 sbi->work_queued = 1;
749 }
750 spin_unlock(&sbi->work_lock);
751}
752
753static void ufs_put_super(struct super_block *sb)
754{
755 struct ufs_sb_info * sbi = UFS_SB(sb);
756
757 UFSD("ENTER\n");
758
759 if (!(sb->s_flags & MS_RDONLY))
760 ufs_put_super_internal(sb);
761 cancel_delayed_work_sync(&sbi->sync_work);
762
763 ubh_brelse_uspi (sbi->s_uspi);
764 kfree (sbi->s_uspi);
765 kfree (sbi);
766 sb->s_fs_info = NULL;
767 UFSD("EXIT\n");
768 return;
769}
770
694static int ufs_fill_super(struct super_block *sb, void *data, int silent) 771static int ufs_fill_super(struct super_block *sb, void *data, int silent)
695{ 772{
696 struct ufs_sb_info * sbi; 773 struct ufs_sb_info * sbi;
@@ -716,6 +793,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
716 if (!sbi) 793 if (!sbi)
717 goto failed_nomem; 794 goto failed_nomem;
718 sb->s_fs_info = sbi; 795 sb->s_fs_info = sbi;
796 sbi->sb = sb;
719 797
720 UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); 798 UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY));
721 799
@@ -727,6 +805,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
727 } 805 }
728#endif 806#endif
729 mutex_init(&sbi->mutex); 807 mutex_init(&sbi->mutex);
808 spin_lock_init(&sbi->work_lock);
809 INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
730 /* 810 /*
731 * Set default mount options 811 * Set default mount options
732 * Parse mount options 812 * Parse mount options
@@ -1191,68 +1271,6 @@ failed_nomem:
1191 return -ENOMEM; 1271 return -ENOMEM;
1192} 1272}
1193 1273
1194static int ufs_sync_fs(struct super_block *sb, int wait)
1195{
1196 struct ufs_sb_private_info * uspi;
1197 struct ufs_super_block_first * usb1;
1198 struct ufs_super_block_third * usb3;
1199 unsigned flags;
1200
1201 lock_ufs(sb);
1202 lock_super(sb);
1203
1204 UFSD("ENTER\n");
1205
1206 flags = UFS_SB(sb)->s_flags;
1207 uspi = UFS_SB(sb)->s_uspi;
1208 usb1 = ubh_get_usb_first(uspi);
1209 usb3 = ubh_get_usb_third(uspi);
1210
1211 usb1->fs_time = cpu_to_fs32(sb, get_seconds());
1212 if ((flags & UFS_ST_MASK) == UFS_ST_SUN ||
1213 (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
1214 (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
1215 ufs_set_fs_state(sb, usb1, usb3,
1216 UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
1217 ufs_put_cstotal(sb);
1218 sb->s_dirt = 0;
1219
1220 UFSD("EXIT\n");
1221 unlock_super(sb);
1222 unlock_ufs(sb);
1223
1224 return 0;
1225}
1226
1227static void ufs_write_super(struct super_block *sb)
1228{
1229 if (!(sb->s_flags & MS_RDONLY))
1230 ufs_sync_fs(sb, 1);
1231 else
1232 sb->s_dirt = 0;
1233}
1234
1235static void ufs_put_super(struct super_block *sb)
1236{
1237 struct ufs_sb_info * sbi = UFS_SB(sb);
1238
1239 UFSD("ENTER\n");
1240
1241 if (sb->s_dirt)
1242 ufs_write_super(sb);
1243
1244 if (!(sb->s_flags & MS_RDONLY))
1245 ufs_put_super_internal(sb);
1246
1247 ubh_brelse_uspi (sbi->s_uspi);
1248 kfree (sbi->s_uspi);
1249 kfree (sbi);
1250 sb->s_fs_info = NULL;
1251 UFSD("EXIT\n");
1252 return;
1253}
1254
1255
1256static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) 1274static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1257{ 1275{
1258 struct ufs_sb_private_info * uspi; 1276 struct ufs_sb_private_info * uspi;
@@ -1308,7 +1326,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1308 ufs_set_fs_state(sb, usb1, usb3, 1326 ufs_set_fs_state(sb, usb1, usb3,
1309 UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); 1327 UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
1310 ubh_mark_buffer_dirty (USPI_UBH(uspi)); 1328 ubh_mark_buffer_dirty (USPI_UBH(uspi));
1311 sb->s_dirt = 0;
1312 sb->s_flags |= MS_RDONLY; 1329 sb->s_flags |= MS_RDONLY;
1313 } else { 1330 } else {
1314 /* 1331 /*
@@ -1458,7 +1475,6 @@ static const struct super_operations ufs_super_ops = {
1458 .write_inode = ufs_write_inode, 1475 .write_inode = ufs_write_inode,
1459 .evict_inode = ufs_evict_inode, 1476 .evict_inode = ufs_evict_inode,
1460 .put_super = ufs_put_super, 1477 .put_super = ufs_put_super,
1461 .write_super = ufs_write_super,
1462 .sync_fs = ufs_sync_fs, 1478 .sync_fs = ufs_sync_fs,
1463 .statfs = ufs_statfs, 1479 .statfs = ufs_statfs,
1464 .remount_fs = ufs_remount, 1480 .remount_fs = ufs_remount,
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index 528750b7e701..343e6fc571e5 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -20,6 +20,10 @@ struct ufs_sb_info {
20 unsigned s_mount_opt; 20 unsigned s_mount_opt;
21 struct mutex mutex; 21 struct mutex mutex;
22 struct task_struct *mutex_owner; 22 struct task_struct *mutex_owner;
23 struct super_block *sb;
24 int work_queued; /* non-zero if the delayed work is queued */
25 struct delayed_work sync_work; /* FS sync delayed work */
26 spinlock_t work_lock; /* protects sync_work and work_queued */
23}; 27};
24 28
25struct ufs_inode_info { 29struct ufs_inode_info {
@@ -123,6 +127,7 @@ extern __printf(3, 4)
123void ufs_error(struct super_block *, const char *, const char *, ...); 127void ufs_error(struct super_block *, const char *, const char *, ...);
124extern __printf(3, 4) 128extern __printf(3, 4)
125void ufs_panic(struct super_block *, const char *, const char *, ...); 129void ufs_panic(struct super_block *, const char *, const char *, ...);
130void ufs_mark_sb_dirty(struct super_block *sb);
126 131
127/* symlink.c */ 132/* symlink.c */
128extern const struct inode_operations ufs_fast_symlink_inode_operations; 133extern const struct inode_operations ufs_fast_symlink_inode_operations;
diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h
index 8aba544f9fad..0cbd5d340b67 100644
--- a/fs/ufs/ufs_fs.h
+++ b/fs/ufs/ufs_fs.h
@@ -34,6 +34,7 @@
34#include <linux/kernel.h> 34#include <linux/kernel.h>
35#include <linux/stat.h> 35#include <linux/stat.h>
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/workqueue.h>
37 38
38#include <asm/div64.h> 39#include <asm/div64.h>
39typedef __u64 __bitwise __fs64; 40typedef __u64 __bitwise __fs64;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9d1aeb7e2734..4f33c32affe3 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1074,13 +1074,13 @@ restart:
1074 * If we couldn't get anything, give up. 1074 * If we couldn't get anything, give up.
1075 */ 1075 */
1076 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1076 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1077 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1078
1077 if (!forced++) { 1079 if (!forced++) {
1078 trace_xfs_alloc_near_busy(args); 1080 trace_xfs_alloc_near_busy(args);
1079 xfs_log_force(args->mp, XFS_LOG_SYNC); 1081 xfs_log_force(args->mp, XFS_LOG_SYNC);
1080 goto restart; 1082 goto restart;
1081 } 1083 }
1082
1083 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1084 trace_xfs_alloc_size_neither(args); 1084 trace_xfs_alloc_size_neither(args);
1085 args->agbno = NULLAGBLOCK; 1085 args->agbno = NULLAGBLOCK;
1086 return 0; 1086 return 0;
@@ -2434,13 +2434,22 @@ xfs_alloc_vextent_worker(
2434 current_restore_flags_nested(&pflags, PF_FSTRANS); 2434 current_restore_flags_nested(&pflags, PF_FSTRANS);
2435} 2435}
2436 2436
2437 2437/*
2438int /* error */ 2438 * Data allocation requests often come in with little stack to work on. Push
2439 * them off to a worker thread so there is lots of stack to use. Metadata
2440 * requests, OTOH, are generally from low stack usage paths, so avoid the
2441 * context switch overhead here.
2442 */
2443int
2439xfs_alloc_vextent( 2444xfs_alloc_vextent(
2440 xfs_alloc_arg_t *args) /* allocation argument structure */ 2445 struct xfs_alloc_arg *args)
2441{ 2446{
2442 DECLARE_COMPLETION_ONSTACK(done); 2447 DECLARE_COMPLETION_ONSTACK(done);
2443 2448
2449 if (!args->userdata)
2450 return __xfs_alloc_vextent(args);
2451
2452
2444 args->done = &done; 2453 args->done = &done;
2445 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker); 2454 INIT_WORK_ONSTACK(&args->work, xfs_alloc_vextent_worker);
2446 queue_work(xfs_alloc_wq, &args->work); 2455 queue_work(xfs_alloc_wq, &args->work);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index a4beb421018a..269b35c084da 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -989,27 +989,6 @@ xfs_buf_ioerror_alert(
989 (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length); 989 (__uint64_t)XFS_BUF_ADDR(bp), func, bp->b_error, bp->b_length);
990} 990}
991 991
992int
993xfs_bwrite(
994 struct xfs_buf *bp)
995{
996 int error;
997
998 ASSERT(xfs_buf_islocked(bp));
999
1000 bp->b_flags |= XBF_WRITE;
1001 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
1002
1003 xfs_bdstrat_cb(bp);
1004
1005 error = xfs_buf_iowait(bp);
1006 if (error) {
1007 xfs_force_shutdown(bp->b_target->bt_mount,
1008 SHUTDOWN_META_IO_ERROR);
1009 }
1010 return error;
1011}
1012
1013/* 992/*
1014 * Called when we want to stop a buffer from getting written or read. 993 * Called when we want to stop a buffer from getting written or read.
1015 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend 994 * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
@@ -1079,14 +1058,7 @@ xfs_bioerror_relse(
1079 return EIO; 1058 return EIO;
1080} 1059}
1081 1060
1082 1061STATIC int
1083/*
1084 * All xfs metadata buffers except log state machine buffers
1085 * get this attached as their b_bdstrat callback function.
1086 * This is so that we can catch a buffer
1087 * after prematurely unpinning it to forcibly shutdown the filesystem.
1088 */
1089int
1090xfs_bdstrat_cb( 1062xfs_bdstrat_cb(
1091 struct xfs_buf *bp) 1063 struct xfs_buf *bp)
1092{ 1064{
@@ -1107,6 +1079,27 @@ xfs_bdstrat_cb(
1107 return 0; 1079 return 0;
1108} 1080}
1109 1081
1082int
1083xfs_bwrite(
1084 struct xfs_buf *bp)
1085{
1086 int error;
1087
1088 ASSERT(xfs_buf_islocked(bp));
1089
1090 bp->b_flags |= XBF_WRITE;
1091 bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q);
1092
1093 xfs_bdstrat_cb(bp);
1094
1095 error = xfs_buf_iowait(bp);
1096 if (error) {
1097 xfs_force_shutdown(bp->b_target->bt_mount,
1098 SHUTDOWN_META_IO_ERROR);
1099 }
1100 return error;
1101}
1102
1110/* 1103/*
1111 * Wrapper around bdstrat so that we can stop data from going to disk in case 1104 * Wrapper around bdstrat so that we can stop data from going to disk in case
1112 * we are shutting down the filesystem. Typically user data goes thru this 1105 * we are shutting down the filesystem. Typically user data goes thru this
@@ -1243,7 +1236,7 @@ xfs_buf_iorequest(
1243 */ 1236 */
1244 atomic_set(&bp->b_io_remaining, 1); 1237 atomic_set(&bp->b_io_remaining, 1);
1245 _xfs_buf_ioapply(bp); 1238 _xfs_buf_ioapply(bp);
1246 _xfs_buf_ioend(bp, 0); 1239 _xfs_buf_ioend(bp, 1);
1247 1240
1248 xfs_buf_rele(bp); 1241 xfs_buf_rele(bp);
1249} 1242}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7f1d1392ce37..79344c48008e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -180,7 +180,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);
180extern int xfs_bwrite(struct xfs_buf *bp); 180extern int xfs_bwrite(struct xfs_buf *bp);
181 181
182extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); 182extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
183extern int xfs_bdstrat_cb(struct xfs_buf *);
184 183
185extern void xfs_buf_ioend(xfs_buf_t *, int); 184extern void xfs_buf_ioend(xfs_buf_t *, int);
186extern void xfs_buf_ioerror(xfs_buf_t *, int); 185extern void xfs_buf_ioerror(xfs_buf_t *, int);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 45df2b857d48..d9e451115f98 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -954,7 +954,7 @@ xfs_buf_iodone_callbacks(
954 954
955 if (!XFS_BUF_ISSTALE(bp)) { 955 if (!XFS_BUF_ISSTALE(bp)) {
956 bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; 956 bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
957 xfs_bdstrat_cb(bp); 957 xfs_buf_iorequest(bp);
958 } else { 958 } else {
959 xfs_buf_relse(bp); 959 xfs_buf_relse(bp);
960 } 960 }
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3a05a41b5d76..1f1535d25a9b 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -208,6 +208,7 @@ xfs_open_by_handle(
208 struct inode *inode; 208 struct inode *inode;
209 struct dentry *dentry; 209 struct dentry *dentry;
210 fmode_t fmode; 210 fmode_t fmode;
211 struct path path;
211 212
212 if (!capable(CAP_SYS_ADMIN)) 213 if (!capable(CAP_SYS_ADMIN))
213 return -XFS_ERROR(EPERM); 214 return -XFS_ERROR(EPERM);
@@ -252,8 +253,10 @@ xfs_open_by_handle(
252 goto out_dput; 253 goto out_dput;
253 } 254 }
254 255
255 filp = dentry_open(dentry, mntget(parfilp->f_path.mnt), 256 path.mnt = parfilp->f_path.mnt;
256 hreq->oflags, cred); 257 path.dentry = dentry;
258 filp = dentry_open(&path, hreq->oflags, cred);
259 dput(dentry);
257 if (IS_ERR(filp)) { 260 if (IS_ERR(filp)) {
258 put_unused_fd(fd); 261 put_unused_fd(fd);
259 return PTR_ERR(filp); 262 return PTR_ERR(filp);
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1a25fd802798..9c4340f5c3e0 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -179,7 +179,7 @@ xfs_vn_create(
179 struct inode *dir, 179 struct inode *dir,
180 struct dentry *dentry, 180 struct dentry *dentry,
181 umode_t mode, 181 umode_t mode,
182 struct nameidata *nd) 182 bool flags)
183{ 183{
184 return xfs_vn_mknod(dir, dentry, mode, 0); 184 return xfs_vn_mknod(dir, dentry, mode, 0);
185} 185}
@@ -197,7 +197,7 @@ STATIC struct dentry *
197xfs_vn_lookup( 197xfs_vn_lookup(
198 struct inode *dir, 198 struct inode *dir,
199 struct dentry *dentry, 199 struct dentry *dentry,
200 struct nameidata *nd) 200 unsigned int flags)
201{ 201{
202 struct xfs_inode *cip; 202 struct xfs_inode *cip;
203 struct xfs_name name; 203 struct xfs_name name;
@@ -222,7 +222,7 @@ STATIC struct dentry *
222xfs_vn_ci_lookup( 222xfs_vn_ci_lookup(
223 struct inode *dir, 223 struct inode *dir,
224 struct dentry *dentry, 224 struct dentry *dentry,
225 struct nameidata *nd) 225 unsigned int flags)
226{ 226{
227 struct xfs_inode *ip; 227 struct xfs_inode *ip;
228 struct xfs_name xname; 228 struct xfs_name xname;