aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs_vfs.h4
-rw-r--r--fs/9p/vfs_inode.c46
-rw-r--r--fs/9p/vfs_inode_dotl.c14
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/Kconfig8
-rw-r--r--fs/Makefile3
-rw-r--r--fs/adfs/super.c4
-rw-r--r--fs/affs/affs.h6
-rw-r--r--fs/affs/amigaffs.c6
-rw-r--r--fs/affs/namei.c8
-rw-r--r--fs/affs/super.c1
-rw-r--r--fs/afs/dir.c12
-rw-r--r--fs/afs/mntpt.c4
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/attr.c4
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/dev-ioctl.c10
-rw-r--r--fs/autofs4/inode.c8
-rw-r--r--fs/autofs4/root.c4
-rw-r--r--fs/bad_inode.c6
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c1
-rw-r--r--fs/binfmt_misc.c6
-rw-r--r--fs/block_dev.c38
-rw-r--r--fs/btrfs/async-thread.c122
-rw-r--r--fs/btrfs/async-thread.h4
-rw-r--r--fs/btrfs/ctree.h3
-rw-r--r--fs/btrfs/delayed-inode.c4
-rw-r--r--fs/btrfs/disk-io.c42
-rw-r--r--fs/btrfs/extent-tree.c45
-rw-r--r--fs/btrfs/file.c8
-rw-r--r--fs/btrfs/free-space-cache.c2
-rw-r--r--fs/btrfs/inode.c202
-rw-r--r--fs/btrfs/ioctl.c28
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c8
-rw-r--r--fs/btrfs/super.c37
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/buffer.c50
-rw-r--r--fs/cachefiles/interface.c1
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/caps.c191
-rw-r--r--fs/ceph/dir.c65
-rw-r--r--fs/ceph/file.c23
-rw-r--r--fs/ceph/inode.c54
-rw-r--r--fs/ceph/ioctl.c4
-rw-r--r--fs/ceph/mds_client.c33
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/snap.c16
-rw-r--r--fs/ceph/super.c8
-rw-r--r--fs/ceph/super.h33
-rw-r--r--fs/ceph/xattr.c42
-rw-r--r--fs/char_dev.c6
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifsfs.c10
-rw-r--r--fs/cifs/cifsfs.h6
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/connect.c6
-rw-r--r--fs/cifs/dir.c4
-rw-r--r--fs/cifs/inode.c4
-rw-r--r--fs/coda/dir.c8
-rw-r--r--fs/coda/inode.c1
-rw-r--r--fs/compat.c13
-rw-r--r--fs/compat_ioctl.c38
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c6
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/configfs/mount.c36
-rw-r--r--fs/cramfs/inode.c3
-rw-r--r--fs/dcache.c37
-rw-r--r--fs/debugfs/file.c117
-rw-r--r--fs/debugfs/inode.c16
-rw-r--r--fs/devpts/inode.c8
-rw-r--r--fs/dlm/lowcomms.c2
-rw-r--r--fs/ecryptfs/inode.c29
-rw-r--r--fs/ecryptfs/super.c5
-rw-r--r--fs/efs/super.c1
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/dir.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/exofs/inode.c2
-rw-r--r--fs/exofs/namei.c6
-rw-r--r--fs/exofs/super.c3
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/ioctl.c12
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext2/super.c5
-rw-r--r--fs/ext3/ialloc.c2
-rw-r--r--fs/ext3/inode.c2
-rw-r--r--fs/ext3/ioctl.c20
-rw-r--r--fs/ext3/namei.c8
-rw-r--r--fs/ext3/super.c7
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/ialloc.c8
-rw-r--r--fs/ext4/inode.c58
-rw-r--r--fs/ext4/ioctl.c28
-rw-r--r--fs/ext4/namei.c8
-rw-r--r--fs/ext4/page-io.c12
-rw-r--r--fs/ext4/super.c27
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c8
-rw-r--r--fs/fat/inode.c33
-rw-r--r--fs/fat/namei_msdos.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fhandle.c8
-rw-r--r--fs/file_table.c23
-rw-r--r--fs/filesystems.c1
-rw-r--r--fs/freevxfs/vxfs_inode.c5
-rw-r--r--fs/fs-writeback.c21
-rw-r--r--fs/fuse/dev.c3
-rw-r--r--fs/fuse/dir.c12
-rw-r--r--fs/fuse/file.c6
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/fuse/inode.c35
-rw-r--r--fs/gfs2/acl.c14
-rw-r--r--fs/gfs2/aops.c18
-rw-r--r--fs/gfs2/bmap.c26
-rw-r--r--fs/gfs2/dir.c64
-rw-r--r--fs/gfs2/dir.h2
-rw-r--r--fs/gfs2/export.c3
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/incore.h20
-rw-r--r--fs/gfs2/inode.c88
-rw-r--r--fs/gfs2/log.c6
-rw-r--r--fs/gfs2/main.c3
-rw-r--r--fs/gfs2/meta_io.c4
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/quota.c91
-rw-r--r--fs/gfs2/rgrp.c293
-rw-r--r--fs/gfs2/rgrp.h16
-rw-r--r--fs/gfs2/super.c23
-rw-r--r--fs/gfs2/trans.h6
-rw-r--r--fs/gfs2/xattr.c48
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfs/super.c5
-rw-r--r--fs/hfsplus/dir.c6
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hfsplus/ioctl.c4
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/hfsplus/super.c1
-rw-r--r--fs/hostfs/hostfs.h2
-rw-r--r--fs/hostfs/hostfs_kern.c11
-rw-r--r--fs/hpfs/namei.c6
-rw-r--r--fs/hpfs/super.c1
-rw-r--r--fs/hppfs/hppfs.c3
-rw-r--r--fs/hugetlbfs/inode.c66
-rw-r--r--fs/inode.c92
-rw-r--r--fs/internal.h30
-rw-r--r--fs/ioctl.c2
-rw-r--r--fs/isofs/inode.c5
-rw-r--r--fs/isofs/isofs.h6
-rw-r--r--fs/jbd/checkpoint.c2
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/checkpoint.c2
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/dir.c14
-rw-r--r--fs/jffs2/super.c5
-rw-r--r--fs/jfs/ioctl.c4
-rw-r--r--fs/jfs/jfs_logmgr.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c4
-rw-r--r--fs/jfs/namei.c6
-rw-r--r--fs/jfs/super.c5
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/lockd/svcsubs.c2
-rw-r--r--fs/locks.c11
-rw-r--r--fs/logfs/dir.c6
-rw-r--r--fs/logfs/inode.c3
-rw-r--r--fs/logfs/logfs.h2
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/inode.c35
-rw-r--r--fs/minix/minix.h2
-rw-r--r--fs/minix/namei.c6
-rw-r--r--fs/mount.h76
-rw-r--r--fs/namei.c55
-rw-r--r--fs/namespace.c831
-rw-r--r--fs/ncpfs/dir.c18
-rw-r--r--fs/ncpfs/inode.c15
-rw-r--r--fs/ncpfs/ioctl.c2
-rw-r--r--fs/ncpfs/ncplib_kernel.h2
-rw-r--r--fs/ncpfs/symlink.c2
-rw-r--r--fs/nfs/dir.c33
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/inode.c44
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs4proc.c29
-rw-r--r--fs/nfs/nfs4state.c33
-rw-r--r--fs/nfs/proc.c3
-rw-r--r--fs/nfs/super.c49
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4recover.c12
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/nfsfh.c4
-rw-r--r--fs/nfsd/nfsfh.h2
-rw-r--r--fs/nfsd/vfs.c38
-rw-r--r--fs/nfsd/vfs.h12
-rw-r--r--fs/nilfs2/dir.c2
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/nilfs2/ioctl.c38
-rw-r--r--fs/nilfs2/namei.c6
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/super.c8
-rw-r--r--fs/notify/fanotify/fanotify_user.c6
-rw-r--r--fs/notify/fsnotify.c9
-rw-r--r--fs/notify/vfsmount_mark.c19
-rw-r--r--fs/ntfs/inode.c9
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/ntfs/super.c6
-rw-r--r--fs/ntfs/volume.h4
-rw-r--r--fs/ocfs2/cluster/netdebug.c2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c24
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/ioctl.c4
-rw-r--r--fs/ocfs2/move_extents.c4
-rw-r--r--fs/ocfs2/namei.c8
-rw-r--r--fs/ocfs2/super.c10
-rw-r--r--fs/ocfs2/xattr.c2
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/omfs/dir.c6
-rw-r--r--fs/omfs/inode.c2
-rw-r--r--fs/omfs/omfs.h2
-rw-r--r--fs/open.c22
-rw-r--r--fs/openpromfs/inode.c1
-rw-r--r--fs/partitions/Kconfig251
-rw-r--r--fs/partitions/Makefile20
-rw-r--r--fs/partitions/acorn.c556
-rw-r--r--fs/partitions/acorn.h14
-rw-r--r--fs/partitions/amiga.c139
-rw-r--r--fs/partitions/amiga.h6
-rw-r--r--fs/partitions/atari.c149
-rw-r--r--fs/partitions/atari.h34
-rw-r--r--fs/partitions/check.c687
-rw-r--r--fs/partitions/check.h49
-rw-r--r--fs/partitions/efi.c675
-rw-r--r--fs/partitions/efi.h134
-rw-r--r--fs/partitions/ibm.c275
-rw-r--r--fs/partitions/ibm.h1
-rw-r--r--fs/partitions/karma.c57
-rw-r--r--fs/partitions/karma.h8
-rw-r--r--fs/partitions/ldm.c1570
-rw-r--r--fs/partitions/ldm.h215
-rw-r--r--fs/partitions/mac.c134
-rw-r--r--fs/partitions/mac.h44
-rw-r--r--fs/partitions/msdos.c552
-rw-r--r--fs/partitions/msdos.h8
-rw-r--r--fs/partitions/osf.c86
-rw-r--r--fs/partitions/osf.h7
-rw-r--r--fs/partitions/sgi.c82
-rw-r--r--fs/partitions/sgi.h8
-rw-r--r--fs/partitions/sun.c122
-rw-r--r--fs/partitions/sun.h8
-rw-r--r--fs/partitions/sysv68.c95
-rw-r--r--fs/partitions/sysv68.h1
-rw-r--r--fs/partitions/ultrix.c48
-rw-r--r--fs/partitions/ultrix.h5
-rw-r--r--fs/pipe.c7
-rw-r--r--fs/pnode.c120
-rw-r--r--fs/pnode.h36
-rw-r--r--fs/proc/array.c8
-rw-r--r--fs/proc/base.c116
-rw-r--r--fs/proc/generic.c8
-rw-r--r--fs/proc/inode.c1
-rw-r--r--fs/proc/namespaces.c1
-rw-r--r--fs/proc/proc_net.c2
-rw-r--r--fs/proc/root.c8
-rw-r--r--fs/proc/stat.c67
-rw-r--r--fs/proc/uptime.c11
-rw-r--r--fs/proc_namespace.c333
-rw-r--r--fs/pstore/inode.c3
-rw-r--r--fs/pstore/platform.c36
-rw-r--r--fs/qnx4/inode.c8
-rw-r--r--fs/quota/dquot.c3
-rw-r--r--fs/quota/quota.c1
-rw-r--r--fs/ramfs/inode.c8
-rw-r--r--fs/reiserfs/bitmap.c91
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/ioctl.c8
-rw-r--r--fs/reiserfs/namei.c8
-rw-r--r--fs/reiserfs/super.c119
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/seq_file.c10
-rw-r--r--fs/splice.c1
-rw-r--r--fs/squashfs/super.c1
-rw-r--r--fs/statfs.c21
-rw-r--r--fs/super.c70
-rw-r--r--fs/sync.c1
-rw-r--r--fs/sysfs/file.c4
-rw-r--r--fs/sysfs/group.c2
-rw-r--r--fs/sysfs/inode.c2
-rw-r--r--fs/sysfs/sysfs.h4
-rw-r--r--fs/sysv/ialloc.c2
-rw-r--r--fs/sysv/inode.c1
-rw-r--r--fs/sysv/itree.c2
-rw-r--r--fs/sysv/namei.c6
-rw-r--r--fs/sysv/sysv.h2
-rw-r--r--fs/ubifs/dir.c14
-rw-r--r--fs/ubifs/ioctl.c4
-rw-r--r--fs/ubifs/super.c23
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/udf/ialloc.c2
-rw-r--r--fs/udf/inode.c6
-rw-r--r--fs/udf/namei.c6
-rw-r--r--fs/udf/super.c19
-rw-r--r--fs/udf/udf_sb.h8
-rw-r--r--fs/udf/udfdecl.h2
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c4
-rw-r--r--fs/ufs/namei.c6
-rw-r--r--fs/ufs/super.c5
-rw-r--r--fs/ufs/ufs.h2
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_buf.c10
-rw-r--r--fs/xfs/xfs_buf.h3
-rw-r--r--fs/xfs/xfs_dquot.c500
-rw-r--r--fs/xfs/xfs_dquot.h39
-rw-r--r--fs/xfs/xfs_dquot_item.c5
-rw-r--r--fs/xfs/xfs_file.c6
-rw-r--r--fs/xfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/xfs_ialloc.h2
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c4
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_ioctl.c8
-rw-r--r--fs/xfs/xfs_ioctl32.c8
-rw-r--r--fs/xfs/xfs_iops.c8
-rw-r--r--fs/xfs/xfs_log.c79
-rw-r--r--fs/xfs/xfs_log.h8
-rw-r--r--fs/xfs/xfs_log_cil.c98
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_qm.c464
-rw-r--r--fs/xfs/xfs_qm.h6
-rw-r--r--fs/xfs/xfs_quota.h12
-rw-r--r--fs/xfs/xfs_super.c70
-rw-r--r--fs/xfs/xfs_sync.c40
-rw-r--r--fs/xfs/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_trans.c475
-rw-r--r--fs/xfs/xfs_trans.h3
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
-rw-r--r--fs/xfs/xfs_vnodeops.h4
353 files changed, 3681 insertions, 10126 deletions
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 410ffd6ceb5f..dc95a252523d 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -54,9 +54,9 @@ extern struct kmem_cache *v9fs_inode_cache;
54 54
55struct inode *v9fs_alloc_inode(struct super_block *sb); 55struct inode *v9fs_alloc_inode(struct super_block *sb);
56void v9fs_destroy_inode(struct inode *inode); 56void v9fs_destroy_inode(struct inode *inode);
57struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t); 57struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t);
58int v9fs_init_inode(struct v9fs_session_info *v9ses, 58int v9fs_init_inode(struct v9fs_session_info *v9ses,
59 struct inode *inode, int mode, dev_t); 59 struct inode *inode, umode_t mode, dev_t);
60void v9fs_evict_inode(struct inode *inode); 60void v9fs_evict_inode(struct inode *inode);
61ino_t v9fs_qid2ino(struct p9_qid *qid); 61ino_t v9fs_qid2ino(struct p9_qid *qid);
62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 879ed8851737..e0f20de6aa2b 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -59,15 +59,13 @@ static const struct inode_operations v9fs_symlink_inode_operations;
59 * 59 *
60 */ 60 */
61 61
62static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode) 62static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode)
63{ 63{
64 int res; 64 int res;
65 res = mode & 0777; 65 res = mode & 0777;
66 if (S_ISDIR(mode)) 66 if (S_ISDIR(mode))
67 res |= P9_DMDIR; 67 res |= P9_DMDIR;
68 if (v9fs_proto_dotu(v9ses)) { 68 if (v9fs_proto_dotu(v9ses)) {
69 if (S_ISLNK(mode))
70 res |= P9_DMSYMLINK;
71 if (v9ses->nodev == 0) { 69 if (v9ses->nodev == 0) {
72 if (S_ISSOCK(mode)) 70 if (S_ISSOCK(mode))
73 res |= P9_DMSOCKET; 71 res |= P9_DMSOCKET;
@@ -85,10 +83,7 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
85 res |= P9_DMSETGID; 83 res |= P9_DMSETGID;
86 if ((mode & S_ISVTX) == S_ISVTX) 84 if ((mode & S_ISVTX) == S_ISVTX)
87 res |= P9_DMSETVTX; 85 res |= P9_DMSETVTX;
88 if ((mode & P9_DMLINK))
89 res |= P9_DMLINK;
90 } 86 }
91
92 return res; 87 return res;
93} 88}
94 89
@@ -99,11 +94,11 @@ static int unixmode2p9mode(struct v9fs_session_info *v9ses, int mode)
99 * @rdev: major number, minor number in case of device files. 94 * @rdev: major number, minor number in case of device files.
100 * 95 *
101 */ 96 */
102static int p9mode2unixmode(struct v9fs_session_info *v9ses, 97static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses,
103 struct p9_wstat *stat, dev_t *rdev) 98 struct p9_wstat *stat, dev_t *rdev)
104{ 99{
105 int res; 100 int res;
106 int mode = stat->mode; 101 u32 mode = stat->mode;
107 102
108 res = mode & S_IALLUGO; 103 res = mode & S_IALLUGO;
109 *rdev = 0; 104 *rdev = 0;
@@ -251,7 +246,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
251static void v9fs_i_callback(struct rcu_head *head) 246static void v9fs_i_callback(struct rcu_head *head)
252{ 247{
253 struct inode *inode = container_of(head, struct inode, i_rcu); 248 struct inode *inode = container_of(head, struct inode, i_rcu);
254 INIT_LIST_HEAD(&inode->i_dentry);
255 kmem_cache_free(v9fs_inode_cache, V9FS_I(inode)); 249 kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
256} 250}
257 251
@@ -261,7 +255,7 @@ void v9fs_destroy_inode(struct inode *inode)
261} 255}
262 256
263int v9fs_init_inode(struct v9fs_session_info *v9ses, 257int v9fs_init_inode(struct v9fs_session_info *v9ses,
264 struct inode *inode, int mode, dev_t rdev) 258 struct inode *inode, umode_t mode, dev_t rdev)
265{ 259{
266 int err = 0; 260 int err = 0;
267 261
@@ -335,7 +329,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
335 329
336 break; 330 break;
337 default: 331 default:
338 P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%x S_IFMT 0x%x\n", 332 P9_DPRINTK(P9_DEBUG_ERROR, "BAD mode 0x%hx S_IFMT 0x%x\n",
339 mode, mode & S_IFMT); 333 mode, mode & S_IFMT);
340 err = -EINVAL; 334 err = -EINVAL;
341 goto error; 335 goto error;
@@ -352,13 +346,13 @@ error:
352 * 346 *
353 */ 347 */
354 348
355struct inode *v9fs_get_inode(struct super_block *sb, int mode, dev_t rdev) 349struct inode *v9fs_get_inode(struct super_block *sb, umode_t mode, dev_t rdev)
356{ 350{
357 int err; 351 int err;
358 struct inode *inode; 352 struct inode *inode;
359 struct v9fs_session_info *v9ses = sb->s_fs_info; 353 struct v9fs_session_info *v9ses = sb->s_fs_info;
360 354
361 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); 355 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %ho\n", sb, mode);
362 356
363 inode = new_inode(sb); 357 inode = new_inode(sb);
364 if (!inode) { 358 if (!inode) {
@@ -492,7 +486,8 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
492 int new) 486 int new)
493{ 487{
494 dev_t rdev; 488 dev_t rdev;
495 int retval, umode; 489 int retval;
490 umode_t umode;
496 unsigned long i_ino; 491 unsigned long i_ino;
497 struct inode *inode; 492 struct inode *inode;
498 struct v9fs_session_info *v9ses = sb->s_fs_info; 493 struct v9fs_session_info *v9ses = sb->s_fs_info;
@@ -703,7 +698,7 @@ error:
703 */ 698 */
704 699
705static int 700static int
706v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, 701v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
707 struct nameidata *nd) 702 struct nameidata *nd)
708{ 703{
709 int err; 704 int err;
@@ -786,7 +781,7 @@ error:
786 * 781 *
787 */ 782 */
788 783
789static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 784static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
790{ 785{
791 int err; 786 int err;
792 u32 perm; 787 u32 perm;
@@ -1131,7 +1126,7 @@ void
1131v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, 1126v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
1132 struct super_block *sb) 1127 struct super_block *sb)
1133{ 1128{
1134 mode_t mode; 1129 umode_t mode;
1135 char ext[32]; 1130 char ext[32];
1136 char tag_name[14]; 1131 char tag_name[14];
1137 unsigned int i_nlink; 1132 unsigned int i_nlink;
@@ -1304,9 +1299,8 @@ v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1304 */ 1299 */
1305 1300
1306static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, 1301static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1307 int mode, const char *extension) 1302 u32 perm, const char *extension)
1308{ 1303{
1309 u32 perm;
1310 struct p9_fid *fid; 1304 struct p9_fid *fid;
1311 struct v9fs_session_info *v9ses; 1305 struct v9fs_session_info *v9ses;
1312 1306
@@ -1316,7 +1310,6 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1316 return -EPERM; 1310 return -EPERM;
1317 } 1311 }
1318 1312
1319 perm = unixmode2p9mode(v9ses, mode);
1320 fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm, 1313 fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm,
1321 P9_OREAD); 1314 P9_OREAD);
1322 if (IS_ERR(fid)) 1315 if (IS_ERR(fid))
@@ -1343,7 +1336,7 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
1343 P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, 1336 P9_DPRINTK(P9_DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino,
1344 dentry->d_name.name, symname); 1337 dentry->d_name.name, symname);
1345 1338
1346 return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname); 1339 return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname);
1347} 1340}
1348 1341
1349/** 1342/**
@@ -1398,13 +1391,15 @@ clunk_fid:
1398 */ 1391 */
1399 1392
1400static int 1393static int
1401v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 1394v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
1402{ 1395{
1396 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
1403 int retval; 1397 int retval;
1404 char *name; 1398 char *name;
1399 u32 perm;
1405 1400
1406 P9_DPRINTK(P9_DEBUG_VFS, 1401 P9_DPRINTK(P9_DEBUG_VFS,
1407 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 1402 " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", dir->i_ino,
1408 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev)); 1403 dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
1409 1404
1410 if (!new_valid_dev(rdev)) 1405 if (!new_valid_dev(rdev))
@@ -1427,7 +1422,8 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1427 return -EINVAL; 1422 return -EINVAL;
1428 } 1423 }
1429 1424
1430 retval = v9fs_vfs_mkspecial(dir, dentry, mode, name); 1425 perm = unixmode2p9mode(v9ses, mode);
1426 retval = v9fs_vfs_mkspecial(dir, dentry, perm, name);
1431 __putname(name); 1427 __putname(name);
1432 1428
1433 return retval; 1429 return retval;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0b5745e21946..8ef152ac6a16 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -48,7 +48,7 @@
48#include "acl.h" 48#include "acl.h"
49 49
50static int 50static int
51v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, 51v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
52 dev_t rdev); 52 dev_t rdev);
53 53
54/** 54/**
@@ -253,7 +253,7 @@ int v9fs_open_to_dotl_flags(int flags)
253 */ 253 */
254 254
255static int 255static int
256v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, 256v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
257 struct nameidata *nd) 257 struct nameidata *nd)
258{ 258{
259 int err = 0; 259 int err = 0;
@@ -284,7 +284,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
284 284
285 name = (char *) dentry->d_name.name; 285 name = (char *) dentry->d_name.name;
286 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x " 286 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
287 "mode:0x%x\n", name, flags, omode); 287 "mode:0x%hx\n", name, flags, omode);
288 288
289 dfid = v9fs_fid_lookup(dentry->d_parent); 289 dfid = v9fs_fid_lookup(dentry->d_parent);
290 if (IS_ERR(dfid)) { 290 if (IS_ERR(dfid)) {
@@ -395,7 +395,7 @@ err_clunk_old_fid:
395 */ 395 */
396 396
397static int v9fs_vfs_mkdir_dotl(struct inode *dir, 397static int v9fs_vfs_mkdir_dotl(struct inode *dir,
398 struct dentry *dentry, int omode) 398 struct dentry *dentry, umode_t omode)
399{ 399{
400 int err; 400 int err;
401 struct v9fs_session_info *v9ses; 401 struct v9fs_session_info *v9ses;
@@ -594,7 +594,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
594void 594void
595v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) 595v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
596{ 596{
597 mode_t mode; 597 umode_t mode;
598 struct v9fs_inode *v9inode = V9FS_I(inode); 598 struct v9fs_inode *v9inode = V9FS_I(inode);
599 599
600 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { 600 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
@@ -799,7 +799,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
799 * 799 *
800 */ 800 */
801static int 801static int
802v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, 802v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
803 dev_t rdev) 803 dev_t rdev)
804{ 804{
805 int err; 805 int err;
@@ -814,7 +814,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
814 struct posix_acl *dacl = NULL, *pacl = NULL; 814 struct posix_acl *dacl = NULL, *pacl = NULL;
815 815
816 P9_DPRINTK(P9_DEBUG_VFS, 816 P9_DPRINTK(P9_DEBUG_VFS,
817 " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, 817 " %lu,%s mode: %hx MAJOR: %u MINOR: %u\n", dir->i_ino,
818 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev)); 818 dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
819 819
820 if (!new_valid_dev(rdev)) 820 if (!new_valid_dev(rdev))
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index c70251d47ed1..f68ff65a32a5 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -117,7 +117,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
117 struct inode *inode = NULL; 117 struct inode *inode = NULL;
118 struct dentry *root = NULL; 118 struct dentry *root = NULL;
119 struct v9fs_session_info *v9ses = NULL; 119 struct v9fs_session_info *v9ses = NULL;
120 int mode = S_IRWXUGO | S_ISVTX; 120 umode_t mode = S_IRWXUGO | S_ISVTX;
121 struct p9_fid *fid; 121 struct p9_fid *fid;
122 int retval = 0; 122 int retval = 0;
123 123
diff --git a/fs/Kconfig b/fs/Kconfig
index 5f4c45d4aa10..30145d886bc2 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -266,14 +266,6 @@ source "fs/9p/Kconfig"
266 266
267endif # NETWORK_FILESYSTEMS 267endif # NETWORK_FILESYSTEMS
268 268
269if BLOCK
270menu "Partition Types"
271
272source "fs/partitions/Kconfig"
273
274endmenu
275endif
276
277source "fs/nls/Kconfig" 269source "fs/nls/Kconfig"
278source "fs/dlm/Kconfig" 270source "fs/dlm/Kconfig"
279 271
diff --git a/fs/Makefile b/fs/Makefile
index d2c3353d5477..93804d4d66e1 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,8 @@ else
19obj-y += no-block.o 19obj-y += no-block.o
20endif 20endif
21 21
22obj-$(CONFIG_PROC_FS) += proc_namespace.o
23
22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o 24obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
23obj-y += notify/ 25obj-y += notify/
24obj-$(CONFIG_EPOLL) += eventpoll.o 26obj-$(CONFIG_EPOLL) += eventpoll.o
@@ -52,7 +54,6 @@ obj-$(CONFIG_FHANDLE) += fhandle.o
52obj-y += quota/ 54obj-y += quota/
53 55
54obj-$(CONFIG_PROC_FS) += proc/ 56obj-$(CONFIG_PROC_FS) += proc/
55obj-y += partitions/
56obj-$(CONFIG_SYSFS) += sysfs/ 57obj-$(CONFIG_SYSFS) += sysfs/
57obj-$(CONFIG_CONFIGFS_FS) += configfs/ 58obj-$(CONFIG_CONFIGFS_FS) += configfs/
58obj-y += devpts/ 59obj-y += devpts/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index c8bf36a1996a..8e3b36ace305 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -126,9 +126,9 @@ static void adfs_put_super(struct super_block *sb)
126 sb->s_fs_info = NULL; 126 sb->s_fs_info = NULL;
127} 127}
128 128
129static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) 129static int adfs_show_options(struct seq_file *seq, struct dentry *root)
130{ 130{
131 struct adfs_sb_info *asb = ADFS_SB(mnt->mnt_sb); 131 struct adfs_sb_info *asb = ADFS_SB(root->d_sb);
132 132
133 if (asb->s_uid != 0) 133 if (asb->s_uid != 0)
134 seq_printf(seq, ",uid=%u", asb->s_uid); 134 seq_printf(seq, ",uid=%u", asb->s_uid);
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index c2b9c79eb64e..45a0ce45d7b4 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -136,7 +136,7 @@ extern int affs_remove_header(struct dentry *dentry);
136extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh); 136extern u32 affs_checksum_block(struct super_block *sb, struct buffer_head *bh);
137extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh); 137extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh);
138extern void secs_to_datestamp(time_t secs, struct affs_date *ds); 138extern void secs_to_datestamp(time_t secs, struct affs_date *ds);
139extern mode_t prot_to_mode(u32 prot); 139extern umode_t prot_to_mode(u32 prot);
140extern void mode_to_prot(struct inode *inode); 140extern void mode_to_prot(struct inode *inode);
141extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...); 141extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...);
142extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...); 142extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...);
@@ -156,8 +156,8 @@ extern void affs_free_bitmap(struct super_block *sb);
156extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); 156extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
157extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); 157extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *);
158extern int affs_unlink(struct inode *dir, struct dentry *dentry); 158extern int affs_unlink(struct inode *dir, struct dentry *dentry);
159extern int affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *); 159extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *);
160extern int affs_mkdir(struct inode *dir, struct dentry *dentry, int mode); 160extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
161extern int affs_rmdir(struct inode *dir, struct dentry *dentry); 161extern int affs_rmdir(struct inode *dir, struct dentry *dentry);
162extern int affs_link(struct dentry *olddentry, struct inode *dir, 162extern int affs_link(struct dentry *olddentry, struct inode *dir,
163 struct dentry *dentry); 163 struct dentry *dentry);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index de37ec842340..52a6407682e6 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -390,10 +390,10 @@ secs_to_datestamp(time_t secs, struct affs_date *ds)
390 ds->ticks = cpu_to_be32(secs * 50); 390 ds->ticks = cpu_to_be32(secs * 50);
391} 391}
392 392
393mode_t 393umode_t
394prot_to_mode(u32 prot) 394prot_to_mode(u32 prot)
395{ 395{
396 int mode = 0; 396 umode_t mode = 0;
397 397
398 if (!(prot & FIBF_NOWRITE)) 398 if (!(prot & FIBF_NOWRITE))
399 mode |= S_IWUSR; 399 mode |= S_IWUSR;
@@ -421,7 +421,7 @@ void
421mode_to_prot(struct inode *inode) 421mode_to_prot(struct inode *inode)
422{ 422{
423 u32 prot = AFFS_I(inode)->i_protect; 423 u32 prot = AFFS_I(inode)->i_protect;
424 mode_t mode = inode->i_mode; 424 umode_t mode = inode->i_mode;
425 425
426 if (!(mode & S_IXUSR)) 426 if (!(mode & S_IXUSR))
427 prot |= FIBF_NOEXECUTE; 427 prot |= FIBF_NOEXECUTE;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 780a11dc6318..47806940aac0 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -255,13 +255,13 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
255} 255}
256 256
257int 257int
258affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 258affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
259{ 259{
260 struct super_block *sb = dir->i_sb; 260 struct super_block *sb = dir->i_sb;
261 struct inode *inode; 261 struct inode *inode;
262 int error; 262 int error;
263 263
264 pr_debug("AFFS: create(%lu,\"%.*s\",0%o)\n",dir->i_ino,(int)dentry->d_name.len, 264 pr_debug("AFFS: create(%lu,\"%.*s\",0%ho)\n",dir->i_ino,(int)dentry->d_name.len,
265 dentry->d_name.name,mode); 265 dentry->d_name.name,mode);
266 266
267 inode = affs_new_inode(dir); 267 inode = affs_new_inode(dir);
@@ -285,12 +285,12 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
285} 285}
286 286
287int 287int
288affs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 288affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
289{ 289{
290 struct inode *inode; 290 struct inode *inode;
291 int error; 291 int error;
292 292
293 pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%o)\n",dir->i_ino, 293 pr_debug("AFFS: mkdir(%lu,\"%.*s\",0%ho)\n",dir->i_ino,
294 (int)dentry->d_name.len,dentry->d_name.name,mode); 294 (int)dentry->d_name.len,dentry->d_name.name,mode);
295 295
296 inode = affs_new_inode(dir); 296 inode = affs_new_inode(dir);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index b31507d0f9b9..8ba73fed7964 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -98,7 +98,6 @@ static struct inode *affs_alloc_inode(struct super_block *sb)
98static void affs_i_callback(struct rcu_head *head) 98static void affs_i_callback(struct rcu_head *head)
99{ 99{
100 struct inode *inode = container_of(head, struct inode, i_rcu); 100 struct inode *inode = container_of(head, struct inode, i_rcu);
101 INIT_LIST_HEAD(&inode->i_dentry);
102 kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); 101 kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
103} 102}
104 103
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1b0b19550015..e22dc4b4a503 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -28,9 +28,9 @@ static int afs_d_delete(const struct dentry *dentry);
28static void afs_d_release(struct dentry *dentry); 28static void afs_d_release(struct dentry *dentry);
29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, 29static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
30 loff_t fpos, u64 ino, unsigned dtype); 30 loff_t fpos, u64 ino, unsigned dtype);
31static int afs_create(struct inode *dir, struct dentry *dentry, int mode, 31static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
32 struct nameidata *nd); 32 struct nameidata *nd);
33static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode); 33static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
34static int afs_rmdir(struct inode *dir, struct dentry *dentry); 34static int afs_rmdir(struct inode *dir, struct dentry *dentry);
35static int afs_unlink(struct inode *dir, struct dentry *dentry); 35static int afs_unlink(struct inode *dir, struct dentry *dentry);
36static int afs_link(struct dentry *from, struct inode *dir, 36static int afs_link(struct dentry *from, struct inode *dir,
@@ -764,7 +764,7 @@ static void afs_d_release(struct dentry *dentry)
764/* 764/*
765 * create a directory on an AFS filesystem 765 * create a directory on an AFS filesystem
766 */ 766 */
767static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 767static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
768{ 768{
769 struct afs_file_status status; 769 struct afs_file_status status;
770 struct afs_callback cb; 770 struct afs_callback cb;
@@ -777,7 +777,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
777 777
778 dvnode = AFS_FS_I(dir); 778 dvnode = AFS_FS_I(dir);
779 779
780 _enter("{%x:%u},{%s},%o", 780 _enter("{%x:%u},{%s},%ho",
781 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); 781 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
782 782
783 ret = -ENAMETOOLONG; 783 ret = -ENAMETOOLONG;
@@ -948,7 +948,7 @@ error:
948/* 948/*
949 * create a regular file on an AFS filesystem 949 * create a regular file on an AFS filesystem
950 */ 950 */
951static int afs_create(struct inode *dir, struct dentry *dentry, int mode, 951static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
952 struct nameidata *nd) 952 struct nameidata *nd)
953{ 953{
954 struct afs_file_status status; 954 struct afs_file_status status;
@@ -962,7 +962,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
962 962
963 dvnode = AFS_FS_I(dir); 963 dvnode = AFS_FS_I(dir);
964 964
965 _enter("{%x:%u},{%s},%o,", 965 _enter("{%x:%u},{%s},%ho,",
966 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode); 966 dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
967 967
968 ret = -ENAMETOOLONG; 968 ret = -ENAMETOOLONG;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index aa59184151d0..8f4ce2658b7d 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -242,7 +242,7 @@ struct vfsmount *afs_d_automount(struct path *path)
242{ 242{
243 struct vfsmount *newmnt; 243 struct vfsmount *newmnt;
244 244
245 _enter("{%s,%s}", path->mnt->mnt_devname, path->dentry->d_name.name); 245 _enter("{%s}", path->dentry->d_name.name);
246 246
247 newmnt = afs_mntpt_do_automount(path->dentry); 247 newmnt = afs_mntpt_do_automount(path->dentry);
248 if (IS_ERR(newmnt)) 248 if (IS_ERR(newmnt))
@@ -252,7 +252,7 @@ struct vfsmount *afs_d_automount(struct path *path)
252 mnt_set_expiry(newmnt, &afs_vfsmounts); 252 mnt_set_expiry(newmnt, &afs_vfsmounts);
253 queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer, 253 queue_delayed_work(afs_wq, &afs_mntpt_expiry_timer,
254 afs_mntpt_expiry_timeout * HZ); 254 afs_mntpt_expiry_timeout * HZ);
255 _leave(" = %p {%s}", newmnt, newmnt->mnt_devname); 255 _leave(" = %p", newmnt);
256 return newmnt; 256 return newmnt;
257} 257}
258 258
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 356dcf0929e8..983ec59fc80d 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -495,7 +495,6 @@ static void afs_i_callback(struct rcu_head *head)
495{ 495{
496 struct inode *inode = container_of(head, struct inode, i_rcu); 496 struct inode *inode = container_of(head, struct inode, i_rcu);
497 struct afs_vnode *vnode = AFS_FS_I(inode); 497 struct afs_vnode *vnode = AFS_FS_I(inode);
498 INIT_LIST_HEAD(&inode->i_dentry);
499 kmem_cache_free(afs_inode_cachep, vnode); 498 kmem_cache_free(afs_inode_cachep, vnode);
500} 499}
501 500
diff --git a/fs/attr.c b/fs/attr.c
index 7ee7ba488313..95053ad8abcc 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -166,7 +166,7 @@ EXPORT_SYMBOL(setattr_copy);
166int notify_change(struct dentry * dentry, struct iattr * attr) 166int notify_change(struct dentry * dentry, struct iattr * attr)
167{ 167{
168 struct inode *inode = dentry->d_inode; 168 struct inode *inode = dentry->d_inode;
169 mode_t mode = inode->i_mode; 169 umode_t mode = inode->i_mode;
170 int error; 170 int error;
171 struct timespec now; 171 struct timespec now;
172 unsigned int ia_valid = attr->ia_valid; 172 unsigned int ia_valid = attr->ia_valid;
@@ -177,7 +177,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
177 } 177 }
178 178
179 if ((ia_valid & ATTR_MODE)) { 179 if ((ia_valid & ATTR_MODE)) {
180 mode_t amode = attr->ia_mode; 180 umode_t amode = attr->ia_mode;
181 /* Flag setting protected by i_mutex */ 181 /* Flag setting protected by i_mutex */
182 if (is_sxid(amode)) 182 if (is_sxid(amode))
183 inode->i_flags &= ~S_NOSEC; 183 inode->i_flags &= ~S_NOSEC;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 326dc08d3e3f..5869d4e974a9 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -155,7 +155,7 @@ static inline int autofs4_ispending(struct dentry *dentry)
155 return 0; 155 return 0;
156} 156}
157 157
158struct inode *autofs4_get_inode(struct super_block *, mode_t); 158struct inode *autofs4_get_inode(struct super_block *, umode_t);
159void autofs4_free_ino(struct autofs_info *); 159void autofs4_free_ino(struct autofs_info *);
160 160
161/* Expiration */ 161/* Expiration */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 509fe1eb66ae..76741d8d7786 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -194,7 +194,7 @@ static int find_autofs_mount(const char *pathname,
194 return err; 194 return err;
195 err = -ENOENT; 195 err = -ENOENT;
196 while (path.dentry == path.mnt->mnt_root) { 196 while (path.dentry == path.mnt->mnt_root) {
197 if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) { 197 if (path.dentry->d_sb->s_magic == AUTOFS_SUPER_MAGIC) {
198 if (test(&path, data)) { 198 if (test(&path, data)) {
199 path_get(&path); 199 path_get(&path);
200 if (!err) /* already found some */ 200 if (!err) /* already found some */
@@ -212,7 +212,7 @@ static int find_autofs_mount(const char *pathname,
212 212
213static int test_by_dev(struct path *path, void *p) 213static int test_by_dev(struct path *path, void *p)
214{ 214{
215 return path->mnt->mnt_sb->s_dev == *(dev_t *)p; 215 return path->dentry->d_sb->s_dev == *(dev_t *)p;
216} 216}
217 217
218static int test_by_type(struct path *path, void *p) 218static int test_by_type(struct path *path, void *p)
@@ -538,11 +538,11 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
538 err = find_autofs_mount(name, &path, test_by_type, &type); 538 err = find_autofs_mount(name, &path, test_by_type, &type);
539 if (err) 539 if (err)
540 goto out; 540 goto out;
541 devid = new_encode_dev(path.mnt->mnt_sb->s_dev); 541 devid = new_encode_dev(path.dentry->d_sb->s_dev);
542 err = 0; 542 err = 0;
543 if (path.mnt->mnt_root == path.dentry) { 543 if (path.mnt->mnt_root == path.dentry) {
544 err = 1; 544 err = 1;
545 magic = path.mnt->mnt_sb->s_magic; 545 magic = path.dentry->d_sb->s_magic;
546 } 546 }
547 } else { 547 } else {
548 dev_t dev = sbi->sb->s_dev; 548 dev_t dev = sbi->sb->s_dev;
@@ -556,7 +556,7 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
556 err = have_submounts(path.dentry); 556 err = have_submounts(path.dentry);
557 557
558 if (follow_down_one(&path)) 558 if (follow_down_one(&path))
559 magic = path.mnt->mnt_sb->s_magic; 559 magic = path.dentry->d_sb->s_magic;
560 } 560 }
561 561
562 param->ismountpoint.out.devid = devid; 562 param->ismountpoint.out.devid = devid;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 8179f1ab8175..2ba44c79d548 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -70,10 +70,10 @@ out_kill_sb:
70 kill_litter_super(sb); 70 kill_litter_super(sb);
71} 71}
72 72
73static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt) 73static int autofs4_show_options(struct seq_file *m, struct dentry *root)
74{ 74{
75 struct autofs_sb_info *sbi = autofs4_sbi(mnt->mnt_sb); 75 struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
76 struct inode *root_inode = mnt->mnt_sb->s_root->d_inode; 76 struct inode *root_inode = root->d_sb->s_root->d_inode;
77 77
78 if (!sbi) 78 if (!sbi)
79 return 0; 79 return 0;
@@ -326,7 +326,7 @@ fail_unlock:
326 return -EINVAL; 326 return -EINVAL;
327} 327}
328 328
329struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode) 329struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
330{ 330{
331 struct inode *inode = new_inode(sb); 331 struct inode *inode = new_inode(sb);
332 332
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index f55ae23b137e..75e5f1c8e028 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -26,7 +26,7 @@
26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); 26static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
27static int autofs4_dir_unlink(struct inode *,struct dentry *); 27static int autofs4_dir_unlink(struct inode *,struct dentry *);
28static int autofs4_dir_rmdir(struct inode *,struct dentry *); 28static int autofs4_dir_rmdir(struct inode *,struct dentry *);
29static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); 29static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t);
30static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); 30static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
31#ifdef CONFIG_COMPAT 31#ifdef CONFIG_COMPAT
32static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); 32static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
@@ -699,7 +699,7 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
699 return 0; 699 return 0;
700} 700}
701 701
702static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode) 702static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
703{ 703{
704 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb); 704 struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
705 struct autofs_info *ino = autofs4_dentry_ino(dentry); 705 struct autofs_info *ino = autofs4_dentry_ino(dentry);
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 9205cf25f1c6..22e9a78872ff 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -173,7 +173,7 @@ static const struct file_operations bad_file_ops =
173}; 173};
174 174
175static int bad_inode_create (struct inode *dir, struct dentry *dentry, 175static int bad_inode_create (struct inode *dir, struct dentry *dentry,
176 int mode, struct nameidata *nd) 176 umode_t mode, struct nameidata *nd)
177{ 177{
178 return -EIO; 178 return -EIO;
179} 179}
@@ -202,7 +202,7 @@ static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
202} 202}
203 203
204static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry, 204static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
205 int mode) 205 umode_t mode)
206{ 206{
207 return -EIO; 207 return -EIO;
208} 208}
@@ -213,7 +213,7 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
213} 213}
214 214
215static int bad_inode_mknod (struct inode *dir, struct dentry *dentry, 215static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
216 int mode, dev_t rdev) 216 umode_t mode, dev_t rdev)
217{ 217{
218 return -EIO; 218 return -EIO;
219} 219}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 8342ca67abcd..6e6d536767fe 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -286,7 +286,6 @@ befs_alloc_inode(struct super_block *sb)
286static void befs_i_callback(struct rcu_head *head) 286static void befs_i_callback(struct rcu_head *head)
287{ 287{
288 struct inode *inode = container_of(head, struct inode, i_rcu); 288 struct inode *inode = container_of(head, struct inode, i_rcu);
289 INIT_LIST_HEAD(&inode->i_dentry);
290 kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); 289 kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
291} 290}
292 291
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 9cc074019479..d12c7966db27 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -84,7 +84,7 @@ const struct file_operations bfs_dir_operations = {
84 84
85extern void dump_imap(const char *, struct super_block *); 85extern void dump_imap(const char *, struct super_block *);
86 86
87static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, 87static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
88 struct nameidata *nd) 88 struct nameidata *nd)
89{ 89{
90 int err; 90 int err;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 697af5bf70b3..b0391bc402b1 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -251,7 +251,6 @@ static struct inode *bfs_alloc_inode(struct super_block *sb)
251static void bfs_i_callback(struct rcu_head *head) 251static void bfs_i_callback(struct rcu_head *head)
252{ 252{
253 struct inode *inode = container_of(head, struct inode, i_rcu); 253 struct inode *inode = container_of(head, struct inode, i_rcu);
254 INIT_LIST_HEAD(&inode->i_dentry);
255 kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); 254 kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
256} 255}
257 256
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1e9edbdeda7e..a9198dfd5f85 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -560,7 +560,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
560 break; 560 break;
561 case 2: set_bit(Enabled, &e->flags); 561 case 2: set_bit(Enabled, &e->flags);
562 break; 562 break;
563 case 3: root = dget(file->f_path.mnt->mnt_sb->s_root); 563 case 3: root = dget(file->f_path.dentry->d_sb->s_root);
564 mutex_lock(&root->d_inode->i_mutex); 564 mutex_lock(&root->d_inode->i_mutex);
565 565
566 kill_node(e); 566 kill_node(e);
@@ -587,7 +587,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
587 Node *e; 587 Node *e;
588 struct inode *inode; 588 struct inode *inode;
589 struct dentry *root, *dentry; 589 struct dentry *root, *dentry;
590 struct super_block *sb = file->f_path.mnt->mnt_sb; 590 struct super_block *sb = file->f_path.dentry->d_sb;
591 int err = 0; 591 int err = 0;
592 592
593 e = create_entry(buffer, count); 593 e = create_entry(buffer, count);
@@ -666,7 +666,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
666 switch (res) { 666 switch (res) {
667 case 1: enabled = 0; break; 667 case 1: enabled = 0; break;
668 case 2: enabled = 1; break; 668 case 2: enabled = 1; break;
669 case 3: root = dget(file->f_path.mnt->mnt_sb->s_root); 669 case 3: root = dget(file->f_path.dentry->d_sb->s_root);
670 mutex_lock(&root->d_inode->i_mutex); 670 mutex_lock(&root->d_inode->i_mutex);
671 671
672 while (!list_empty(&entries)) 672 while (!list_empty(&entries))
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b07f1da1de4e..69a5b6fbee2b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -17,6 +17,7 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/blkpg.h> 18#include <linux/blkpg.h>
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20#include <linux/swap.h>
20#include <linux/pagevec.h> 21#include <linux/pagevec.h>
21#include <linux/writeback.h> 22#include <linux/writeback.h>
22#include <linux/mpage.h> 23#include <linux/mpage.h>
@@ -25,6 +26,7 @@
25#include <linux/namei.h> 26#include <linux/namei.h>
26#include <linux/log2.h> 27#include <linux/log2.h>
27#include <linux/kmemleak.h> 28#include <linux/kmemleak.h>
29#include <linux/cleancache.h>
28#include <asm/uaccess.h> 30#include <asm/uaccess.h>
29#include "internal.h" 31#include "internal.h"
30 32
@@ -82,13 +84,35 @@ static sector_t max_block(struct block_device *bdev)
82} 84}
83 85
84/* Kill _all_ buffers and pagecache , dirty or not.. */ 86/* Kill _all_ buffers and pagecache , dirty or not.. */
85static void kill_bdev(struct block_device *bdev) 87void kill_bdev(struct block_device *bdev)
86{ 88{
87 if (bdev->bd_inode->i_mapping->nrpages == 0) 89 struct address_space *mapping = bdev->bd_inode->i_mapping;
90
91 if (mapping->nrpages == 0)
88 return; 92 return;
93
89 invalidate_bh_lrus(); 94 invalidate_bh_lrus();
90 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 95 truncate_inode_pages(mapping, 0);
91} 96}
97EXPORT_SYMBOL(kill_bdev);
98
99/* Invalidate clean unused buffers and pagecache. */
100void invalidate_bdev(struct block_device *bdev)
101{
102 struct address_space *mapping = bdev->bd_inode->i_mapping;
103
104 if (mapping->nrpages == 0)
105 return;
106
107 invalidate_bh_lrus();
108 lru_add_drain_all(); /* make sure all lru add caches are flushed */
109 invalidate_mapping_pages(mapping, 0, -1);
110 /* 99% of the time, we don't need to flush the cleancache on the bdev.
111 * But, for the strange corners, lets be cautious
112 */
113 cleancache_flush_inode(mapping);
114}
115EXPORT_SYMBOL(invalidate_bdev);
92 116
93int set_blocksize(struct block_device *bdev, int size) 117int set_blocksize(struct block_device *bdev, int size)
94{ 118{
@@ -425,7 +449,6 @@ static void bdev_i_callback(struct rcu_head *head)
425 struct inode *inode = container_of(head, struct inode, i_rcu); 449 struct inode *inode = container_of(head, struct inode, i_rcu);
426 struct bdev_inode *bdi = BDEV_I(inode); 450 struct bdev_inode *bdi = BDEV_I(inode);
427 451
428 INIT_LIST_HEAD(&inode->i_dentry);
429 kmem_cache_free(bdev_cachep, bdi); 452 kmem_cache_free(bdev_cachep, bdi);
430} 453}
431 454
@@ -493,7 +516,7 @@ static struct file_system_type bd_type = {
493 .kill_sb = kill_anon_super, 516 .kill_sb = kill_anon_super,
494}; 517};
495 518
496struct super_block *blockdev_superblock __read_mostly; 519static struct super_block *blockdev_superblock __read_mostly;
497 520
498void __init bdev_cache_init(void) 521void __init bdev_cache_init(void)
499{ 522{
@@ -639,6 +662,11 @@ static struct block_device *bd_acquire(struct inode *inode)
639 return bdev; 662 return bdev;
640} 663}
641 664
665static inline int sb_is_blkdev_sb(struct super_block *sb)
666{
667 return sb == blockdev_superblock;
668}
669
642/* Call when you free inode */ 670/* Call when you free inode */
643 671
644void bd_forget(struct inode *inode) 672void bd_forget(struct inode *inode)
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 7ec14097fef1..0cc20b35c1c4 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -64,6 +64,8 @@ struct btrfs_worker_thread {
64 int idle; 64 int idle;
65}; 65};
66 66
67static int __btrfs_start_workers(struct btrfs_workers *workers);
68
67/* 69/*
68 * btrfs_start_workers uses kthread_run, which can block waiting for memory 70 * btrfs_start_workers uses kthread_run, which can block waiting for memory
69 * for a very long time. It will actually throttle on page writeback, 71 * for a very long time. It will actually throttle on page writeback,
@@ -88,27 +90,10 @@ static void start_new_worker_func(struct btrfs_work *work)
88{ 90{
89 struct worker_start *start; 91 struct worker_start *start;
90 start = container_of(work, struct worker_start, work); 92 start = container_of(work, struct worker_start, work);
91 btrfs_start_workers(start->queue, 1); 93 __btrfs_start_workers(start->queue);
92 kfree(start); 94 kfree(start);
93} 95}
94 96
95static int start_new_worker(struct btrfs_workers *queue)
96{
97 struct worker_start *start;
98 int ret;
99
100 start = kzalloc(sizeof(*start), GFP_NOFS);
101 if (!start)
102 return -ENOMEM;
103
104 start->work.func = start_new_worker_func;
105 start->queue = queue;
106 ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
107 if (ret)
108 kfree(start);
109 return ret;
110}
111
112/* 97/*
113 * helper function to move a thread onto the idle list after it 98 * helper function to move a thread onto the idle list after it
114 * has finished some requests. 99 * has finished some requests.
@@ -153,12 +138,20 @@ static void check_busy_worker(struct btrfs_worker_thread *worker)
153static void check_pending_worker_creates(struct btrfs_worker_thread *worker) 138static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
154{ 139{
155 struct btrfs_workers *workers = worker->workers; 140 struct btrfs_workers *workers = worker->workers;
141 struct worker_start *start;
156 unsigned long flags; 142 unsigned long flags;
157 143
158 rmb(); 144 rmb();
159 if (!workers->atomic_start_pending) 145 if (!workers->atomic_start_pending)
160 return; 146 return;
161 147
148 start = kzalloc(sizeof(*start), GFP_NOFS);
149 if (!start)
150 return;
151
152 start->work.func = start_new_worker_func;
153 start->queue = workers;
154
162 spin_lock_irqsave(&workers->lock, flags); 155 spin_lock_irqsave(&workers->lock, flags);
163 if (!workers->atomic_start_pending) 156 if (!workers->atomic_start_pending)
164 goto out; 157 goto out;
@@ -170,10 +163,11 @@ static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
170 163
171 workers->num_workers_starting += 1; 164 workers->num_workers_starting += 1;
172 spin_unlock_irqrestore(&workers->lock, flags); 165 spin_unlock_irqrestore(&workers->lock, flags);
173 start_new_worker(workers); 166 btrfs_queue_worker(workers->atomic_worker_start, &start->work);
174 return; 167 return;
175 168
176out: 169out:
170 kfree(start);
177 spin_unlock_irqrestore(&workers->lock, flags); 171 spin_unlock_irqrestore(&workers->lock, flags);
178} 172}
179 173
@@ -331,7 +325,7 @@ again:
331 run_ordered_completions(worker->workers, work); 325 run_ordered_completions(worker->workers, work);
332 326
333 check_pending_worker_creates(worker); 327 check_pending_worker_creates(worker);
334 328 cond_resched();
335 } 329 }
336 330
337 spin_lock_irq(&worker->lock); 331 spin_lock_irq(&worker->lock);
@@ -340,7 +334,7 @@ again:
340 if (freezing(current)) { 334 if (freezing(current)) {
341 worker->working = 0; 335 worker->working = 0;
342 spin_unlock_irq(&worker->lock); 336 spin_unlock_irq(&worker->lock);
343 refrigerator(); 337 try_to_freeze();
344 } else { 338 } else {
345 spin_unlock_irq(&worker->lock); 339 spin_unlock_irq(&worker->lock);
346 if (!kthread_should_stop()) { 340 if (!kthread_should_stop()) {
@@ -462,56 +456,55 @@ void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
462 * starts new worker threads. This does not enforce the max worker 456 * starts new worker threads. This does not enforce the max worker
463 * count in case you need to temporarily go past it. 457 * count in case you need to temporarily go past it.
464 */ 458 */
465static int __btrfs_start_workers(struct btrfs_workers *workers, 459static int __btrfs_start_workers(struct btrfs_workers *workers)
466 int num_workers)
467{ 460{
468 struct btrfs_worker_thread *worker; 461 struct btrfs_worker_thread *worker;
469 int ret = 0; 462 int ret = 0;
470 int i;
471 463
472 for (i = 0; i < num_workers; i++) { 464 worker = kzalloc(sizeof(*worker), GFP_NOFS);
473 worker = kzalloc(sizeof(*worker), GFP_NOFS); 465 if (!worker) {
474 if (!worker) { 466 ret = -ENOMEM;
475 ret = -ENOMEM; 467 goto fail;
476 goto fail; 468 }
477 }
478 469
479 INIT_LIST_HEAD(&worker->pending); 470 INIT_LIST_HEAD(&worker->pending);
480 INIT_LIST_HEAD(&worker->prio_pending); 471 INIT_LIST_HEAD(&worker->prio_pending);
481 INIT_LIST_HEAD(&worker->worker_list); 472 INIT_LIST_HEAD(&worker->worker_list);
482 spin_lock_init(&worker->lock); 473 spin_lock_init(&worker->lock);
483 474
484 atomic_set(&worker->num_pending, 0); 475 atomic_set(&worker->num_pending, 0);
485 atomic_set(&worker->refs, 1); 476 atomic_set(&worker->refs, 1);
486 worker->workers = workers; 477 worker->workers = workers;
487 worker->task = kthread_run(worker_loop, worker, 478 worker->task = kthread_run(worker_loop, worker,
488 "btrfs-%s-%d", workers->name, 479 "btrfs-%s-%d", workers->name,
489 workers->num_workers + i); 480 workers->num_workers + 1);
490 if (IS_ERR(worker->task)) { 481 if (IS_ERR(worker->task)) {
491 ret = PTR_ERR(worker->task); 482 ret = PTR_ERR(worker->task);
492 kfree(worker); 483 kfree(worker);
493 goto fail; 484 goto fail;
494 }
495 spin_lock_irq(&workers->lock);
496 list_add_tail(&worker->worker_list, &workers->idle_list);
497 worker->idle = 1;
498 workers->num_workers++;
499 workers->num_workers_starting--;
500 WARN_ON(workers->num_workers_starting < 0);
501 spin_unlock_irq(&workers->lock);
502 } 485 }
486 spin_lock_irq(&workers->lock);
487 list_add_tail(&worker->worker_list, &workers->idle_list);
488 worker->idle = 1;
489 workers->num_workers++;
490 workers->num_workers_starting--;
491 WARN_ON(workers->num_workers_starting < 0);
492 spin_unlock_irq(&workers->lock);
493
503 return 0; 494 return 0;
504fail: 495fail:
505 btrfs_stop_workers(workers); 496 spin_lock_irq(&workers->lock);
497 workers->num_workers_starting--;
498 spin_unlock_irq(&workers->lock);
506 return ret; 499 return ret;
507} 500}
508 501
509int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) 502int btrfs_start_workers(struct btrfs_workers *workers)
510{ 503{
511 spin_lock_irq(&workers->lock); 504 spin_lock_irq(&workers->lock);
512 workers->num_workers_starting += num_workers; 505 workers->num_workers_starting++;
513 spin_unlock_irq(&workers->lock); 506 spin_unlock_irq(&workers->lock);
514 return __btrfs_start_workers(workers, num_workers); 507 return __btrfs_start_workers(workers);
515} 508}
516 509
517/* 510/*
@@ -568,9 +561,10 @@ static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
568 struct btrfs_worker_thread *worker; 561 struct btrfs_worker_thread *worker;
569 unsigned long flags; 562 unsigned long flags;
570 struct list_head *fallback; 563 struct list_head *fallback;
564 int ret;
571 565
572again:
573 spin_lock_irqsave(&workers->lock, flags); 566 spin_lock_irqsave(&workers->lock, flags);
567again:
574 worker = next_worker(workers); 568 worker = next_worker(workers);
575 569
576 if (!worker) { 570 if (!worker) {
@@ -584,7 +578,10 @@ again:
584 workers->num_workers_starting++; 578 workers->num_workers_starting++;
585 spin_unlock_irqrestore(&workers->lock, flags); 579 spin_unlock_irqrestore(&workers->lock, flags);
586 /* we're below the limit, start another worker */ 580 /* we're below the limit, start another worker */
587 __btrfs_start_workers(workers, 1); 581 ret = __btrfs_start_workers(workers);
582 spin_lock_irqsave(&workers->lock, flags);
583 if (ret)
584 goto fallback;
588 goto again; 585 goto again;
589 } 586 }
590 } 587 }
@@ -665,7 +662,7 @@ void btrfs_set_work_high_prio(struct btrfs_work *work)
665/* 662/*
666 * places a struct btrfs_work into the pending queue of one of the kthreads 663 * places a struct btrfs_work into the pending queue of one of the kthreads
667 */ 664 */
668int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) 665void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
669{ 666{
670 struct btrfs_worker_thread *worker; 667 struct btrfs_worker_thread *worker;
671 unsigned long flags; 668 unsigned long flags;
@@ -673,7 +670,7 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
673 670
674 /* don't requeue something already on a list */ 671 /* don't requeue something already on a list */
675 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) 672 if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
676 goto out; 673 return;
677 674
678 worker = find_worker(workers); 675 worker = find_worker(workers);
679 if (workers->ordered) { 676 if (workers->ordered) {
@@ -712,7 +709,4 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
712 if (wake) 709 if (wake)
713 wake_up_process(worker->task); 710 wake_up_process(worker->task);
714 spin_unlock_irqrestore(&worker->lock, flags); 711 spin_unlock_irqrestore(&worker->lock, flags);
715
716out:
717 return 0;
718} 712}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 5077746cf85e..f34cc31fa3c9 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -109,8 +109,8 @@ struct btrfs_workers {
109 char *name; 109 char *name;
110}; 110};
111 111
112int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); 112void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work);
113int btrfs_start_workers(struct btrfs_workers *workers, int num_workers); 113int btrfs_start_workers(struct btrfs_workers *workers);
114int btrfs_stop_workers(struct btrfs_workers *workers); 114int btrfs_stop_workers(struct btrfs_workers *workers);
115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, 115void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
116 struct btrfs_workers *async_starter); 116 struct btrfs_workers *async_starter);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 50634abef9b4..67385033323d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2692,7 +2692,8 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2692int btrfs_readpage(struct file *file, struct page *page); 2692int btrfs_readpage(struct file *file, struct page *page);
2693void btrfs_evict_inode(struct inode *inode); 2693void btrfs_evict_inode(struct inode *inode);
2694int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); 2694int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
2695void btrfs_dirty_inode(struct inode *inode, int flags); 2695int btrfs_dirty_inode(struct inode *inode);
2696int btrfs_update_time(struct file *file);
2696struct inode *btrfs_alloc_inode(struct super_block *sb); 2697struct inode *btrfs_alloc_inode(struct super_block *sb);
2697void btrfs_destroy_inode(struct inode *inode); 2698void btrfs_destroy_inode(struct inode *inode);
2698int btrfs_drop_inode(struct inode *inode); 2699int btrfs_drop_inode(struct inode *inode);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5b163572e0ca..9c1eccc2c503 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -640,8 +640,8 @@ static int btrfs_delayed_inode_reserve_metadata(
640 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since 640 * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
641 * we're accounted for. 641 * we're accounted for.
642 */ 642 */
643 if (!trans->bytes_reserved && 643 if (!src_rsv || (!trans->bytes_reserved &&
644 src_rsv != &root->fs_info->delalloc_block_rsv) { 644 src_rsv != &root->fs_info->delalloc_block_rsv)) {
645 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); 645 ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
646 /* 646 /*
647 * Since we're under a transaction reserve_metadata_bytes could 647 * Since we're under a transaction reserve_metadata_bytes could
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 632f8f3cc9db..f99a099a7747 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1579,9 +1579,7 @@ static int cleaner_kthread(void *arg)
1579 btrfs_run_defrag_inodes(root->fs_info); 1579 btrfs_run_defrag_inodes(root->fs_info);
1580 } 1580 }
1581 1581
1582 if (freezing(current)) { 1582 if (!try_to_freeze()) {
1583 refrigerator();
1584 } else {
1585 set_current_state(TASK_INTERRUPTIBLE); 1583 set_current_state(TASK_INTERRUPTIBLE);
1586 if (!kthread_should_stop()) 1584 if (!kthread_should_stop())
1587 schedule(); 1585 schedule();
@@ -1635,9 +1633,7 @@ sleep:
1635 wake_up_process(root->fs_info->cleaner_kthread); 1633 wake_up_process(root->fs_info->cleaner_kthread);
1636 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1634 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
1637 1635
1638 if (freezing(current)) { 1636 if (!try_to_freeze()) {
1639 refrigerator();
1640 } else {
1641 set_current_state(TASK_INTERRUPTIBLE); 1637 set_current_state(TASK_INTERRUPTIBLE);
1642 if (!kthread_should_stop() && 1638 if (!kthread_should_stop() &&
1643 !btrfs_transaction_blocked(root->fs_info)) 1639 !btrfs_transaction_blocked(root->fs_info))
@@ -2194,19 +2190,27 @@ struct btrfs_root *open_ctree(struct super_block *sb,
2194 fs_info->endio_meta_write_workers.idle_thresh = 2; 2190 fs_info->endio_meta_write_workers.idle_thresh = 2;
2195 fs_info->readahead_workers.idle_thresh = 2; 2191 fs_info->readahead_workers.idle_thresh = 2;
2196 2192
2197 btrfs_start_workers(&fs_info->workers, 1); 2193 /*
2198 btrfs_start_workers(&fs_info->generic_worker, 1); 2194 * btrfs_start_workers can really only fail because of ENOMEM so just
2199 btrfs_start_workers(&fs_info->submit_workers, 1); 2195 * return -ENOMEM if any of these fail.
2200 btrfs_start_workers(&fs_info->delalloc_workers, 1); 2196 */
2201 btrfs_start_workers(&fs_info->fixup_workers, 1); 2197 ret = btrfs_start_workers(&fs_info->workers);
2202 btrfs_start_workers(&fs_info->endio_workers, 1); 2198 ret |= btrfs_start_workers(&fs_info->generic_worker);
2203 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 2199 ret |= btrfs_start_workers(&fs_info->submit_workers);
2204 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 2200 ret |= btrfs_start_workers(&fs_info->delalloc_workers);
2205 btrfs_start_workers(&fs_info->endio_write_workers, 1); 2201 ret |= btrfs_start_workers(&fs_info->fixup_workers);
2206 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 2202 ret |= btrfs_start_workers(&fs_info->endio_workers);
2207 btrfs_start_workers(&fs_info->delayed_workers, 1); 2203 ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
2208 btrfs_start_workers(&fs_info->caching_workers, 1); 2204 ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
2209 btrfs_start_workers(&fs_info->readahead_workers, 1); 2205 ret |= btrfs_start_workers(&fs_info->endio_write_workers);
2206 ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
2207 ret |= btrfs_start_workers(&fs_info->delayed_workers);
2208 ret |= btrfs_start_workers(&fs_info->caching_workers);
2209 ret |= btrfs_start_workers(&fs_info->readahead_workers);
2210 if (ret) {
2211 ret = -ENOMEM;
2212 goto fail_sb_buffer;
2213 }
2210 2214
2211 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 2215 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
2212 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 2216 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2ad813674d77..f5fbe576d2ba 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2822,7 +2822,7 @@ out_free:
2822 btrfs_release_path(path); 2822 btrfs_release_path(path);
2823out: 2823out:
2824 spin_lock(&block_group->lock); 2824 spin_lock(&block_group->lock);
2825 if (!ret) 2825 if (!ret && dcs == BTRFS_DC_SETUP)
2826 block_group->cache_generation = trans->transid; 2826 block_group->cache_generation = trans->transid;
2827 block_group->disk_cache_state = dcs; 2827 block_group->disk_cache_state = dcs;
2828 spin_unlock(&block_group->lock); 2828 spin_unlock(&block_group->lock);
@@ -4204,12 +4204,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4204 struct btrfs_root *root = BTRFS_I(inode)->root; 4204 struct btrfs_root *root = BTRFS_I(inode)->root;
4205 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 4205 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
4206 u64 to_reserve = 0; 4206 u64 to_reserve = 0;
4207 u64 csum_bytes;
4207 unsigned nr_extents = 0; 4208 unsigned nr_extents = 0;
4209 int extra_reserve = 0;
4208 int flush = 1; 4210 int flush = 1;
4209 int ret; 4211 int ret;
4210 4212
4213 /* Need to be holding the i_mutex here if we aren't free space cache */
4211 if (btrfs_is_free_space_inode(root, inode)) 4214 if (btrfs_is_free_space_inode(root, inode))
4212 flush = 0; 4215 flush = 0;
4216 else
4217 WARN_ON(!mutex_is_locked(&inode->i_mutex));
4213 4218
4214 if (flush && btrfs_transaction_in_commit(root->fs_info)) 4219 if (flush && btrfs_transaction_in_commit(root->fs_info))
4215 schedule_timeout(1); 4220 schedule_timeout(1);
@@ -4220,11 +4225,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4220 BTRFS_I(inode)->outstanding_extents++; 4225 BTRFS_I(inode)->outstanding_extents++;
4221 4226
4222 if (BTRFS_I(inode)->outstanding_extents > 4227 if (BTRFS_I(inode)->outstanding_extents >
4223 BTRFS_I(inode)->reserved_extents) { 4228 BTRFS_I(inode)->reserved_extents)
4224 nr_extents = BTRFS_I(inode)->outstanding_extents - 4229 nr_extents = BTRFS_I(inode)->outstanding_extents -
4225 BTRFS_I(inode)->reserved_extents; 4230 BTRFS_I(inode)->reserved_extents;
4226 BTRFS_I(inode)->reserved_extents += nr_extents;
4227 }
4228 4231
4229 /* 4232 /*
4230 * Add an item to reserve for updating the inode when we complete the 4233 * Add an item to reserve for updating the inode when we complete the
@@ -4232,11 +4235,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4232 */ 4235 */
4233 if (!BTRFS_I(inode)->delalloc_meta_reserved) { 4236 if (!BTRFS_I(inode)->delalloc_meta_reserved) {
4234 nr_extents++; 4237 nr_extents++;
4235 BTRFS_I(inode)->delalloc_meta_reserved = 1; 4238 extra_reserve = 1;
4236 } 4239 }
4237 4240
4238 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4241 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
4239 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); 4242 to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
4243 csum_bytes = BTRFS_I(inode)->csum_bytes;
4240 spin_unlock(&BTRFS_I(inode)->lock); 4244 spin_unlock(&BTRFS_I(inode)->lock);
4241 4245
4242 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4246 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -4246,22 +4250,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4246 4250
4247 spin_lock(&BTRFS_I(inode)->lock); 4251 spin_lock(&BTRFS_I(inode)->lock);
4248 dropped = drop_outstanding_extent(inode); 4252 dropped = drop_outstanding_extent(inode);
4249 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4250 spin_unlock(&BTRFS_I(inode)->lock);
4251 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4252
4253 /* 4253 /*
4254 * Somebody could have come in and twiddled with the 4254 * If the inodes csum_bytes is the same as the original
4255 * reservation, so if we have to free more than we would have 4255 * csum_bytes then we know we haven't raced with any free()ers
4256 * reserved from this reservation go ahead and release those 4256 * so we can just reduce our inodes csum bytes and carry on.
4257 * bytes. 4257 * Otherwise we have to do the normal free thing to account for
4258 * the case that the free side didn't free up its reserve
4259 * because of this outstanding reservation.
4258 */ 4260 */
4259 to_free -= to_reserve; 4261 if (BTRFS_I(inode)->csum_bytes == csum_bytes)
4262 calc_csum_metadata_size(inode, num_bytes, 0);
4263 else
4264 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4265 spin_unlock(&BTRFS_I(inode)->lock);
4266 if (dropped)
4267 to_free += btrfs_calc_trans_metadata_size(root, dropped);
4268
4260 if (to_free) 4269 if (to_free)
4261 btrfs_block_rsv_release(root, block_rsv, to_free); 4270 btrfs_block_rsv_release(root, block_rsv, to_free);
4262 return ret; 4271 return ret;
4263 } 4272 }
4264 4273
4274 spin_lock(&BTRFS_I(inode)->lock);
4275 if (extra_reserve) {
4276 BTRFS_I(inode)->delalloc_meta_reserved = 1;
4277 nr_extents--;
4278 }
4279 BTRFS_I(inode)->reserved_extents += nr_extents;
4280 spin_unlock(&BTRFS_I(inode)->lock);
4281
4265 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4282 block_rsv_add_bytes(block_rsv, to_reserve, 1);
4266 4283
4267 return 0; 4284 return 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index dafdfa059bf6..97fbe939c050 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1167,6 +1167,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
1167 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / 1167 nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
1168 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / 1168 PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
1169 (sizeof(struct page *))); 1169 (sizeof(struct page *)));
1170 nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
1171 nrptrs = max(nrptrs, 8);
1170 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 1172 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
1171 if (!pages) 1173 if (!pages)
1172 return -ENOMEM; 1174 return -ENOMEM;
@@ -1387,7 +1389,11 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1387 goto out; 1389 goto out;
1388 } 1390 }
1389 1391
1390 file_update_time(file); 1392 err = btrfs_update_time(file);
1393 if (err) {
1394 mutex_unlock(&inode->i_mutex);
1395 goto out;
1396 }
1391 BTRFS_I(inode)->sequence++; 1397 BTRFS_I(inode)->sequence++;
1392 1398
1393 start_pos = round_down(pos, root->sectorsize); 1399 start_pos = round_down(pos, root->sectorsize);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index ec23d43d0c35..9a897bf79538 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -423,7 +423,7 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
423 } 423 }
424 424
425 if (index == 0) 425 if (index == 0)
426 offset = sizeof(u32) * io_ctl->num_pages;; 426 offset = sizeof(u32) * io_ctl->num_pages;
427 427
428 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc, 428 crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
429 PAGE_CACHE_SIZE - offset); 429 PAGE_CACHE_SIZE - offset);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2c984f7d4c2a..81b235a61f8c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -38,6 +38,7 @@
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/mount.h>
41#include "compat.h" 42#include "compat.h"
42#include "ctree.h" 43#include "ctree.h"
43#include "disk-io.h" 44#include "disk-io.h"
@@ -1943,7 +1944,7 @@ enum btrfs_orphan_cleanup_state {
1943}; 1944};
1944 1945
1945/* 1946/*
1946 * This is called in transaction commmit time. If there are no orphan 1947 * This is called in transaction commit time. If there are no orphan
1947 * files in the subvolume, it removes orphan item and frees block_rsv 1948 * files in the subvolume, it removes orphan item and frees block_rsv
1948 * structure. 1949 * structure.
1949 */ 1950 */
@@ -2031,7 +2032,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
2031 /* insert an orphan item to track this unlinked/truncated file */ 2032 /* insert an orphan item to track this unlinked/truncated file */
2032 if (insert >= 1) { 2033 if (insert >= 1) {
2033 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); 2034 ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
2034 BUG_ON(ret); 2035 BUG_ON(ret && ret != -EEXIST);
2035 } 2036 }
2036 2037
2037 /* insert an orphan item to track subvolume contains orphan files */ 2038 /* insert an orphan item to track subvolume contains orphan files */
@@ -2158,6 +2159,38 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2158 if (ret && ret != -ESTALE) 2159 if (ret && ret != -ESTALE)
2159 goto out; 2160 goto out;
2160 2161
2162 if (ret == -ESTALE && root == root->fs_info->tree_root) {
2163 struct btrfs_root *dead_root;
2164 struct btrfs_fs_info *fs_info = root->fs_info;
2165 int is_dead_root = 0;
2166
2167 /*
2168 * this is an orphan in the tree root. Currently these
2169 * could come from 2 sources:
2170 * a) a snapshot deletion in progress
2171 * b) a free space cache inode
2172 * We need to distinguish those two, as the snapshot
2173 * orphan must not get deleted.
2174 * find_dead_roots already ran before us, so if this
2175 * is a snapshot deletion, we should find the root
2176 * in the dead_roots list
2177 */
2178 spin_lock(&fs_info->trans_lock);
2179 list_for_each_entry(dead_root, &fs_info->dead_roots,
2180 root_list) {
2181 if (dead_root->root_key.objectid ==
2182 found_key.objectid) {
2183 is_dead_root = 1;
2184 break;
2185 }
2186 }
2187 spin_unlock(&fs_info->trans_lock);
2188 if (is_dead_root) {
2189 /* prevent this orphan from being found again */
2190 key.offset = found_key.objectid - 1;
2191 continue;
2192 }
2193 }
2161 /* 2194 /*
2162 * Inode is already gone but the orphan item is still there, 2195 * Inode is already gone but the orphan item is still there,
2163 * kill the orphan item. 2196 * kill the orphan item.
@@ -2191,7 +2224,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2191 continue; 2224 continue;
2192 } 2225 }
2193 nr_truncate++; 2226 nr_truncate++;
2227 /*
2228 * Need to hold the imutex for reservation purposes, not
2229 * a huge deal here but I have a WARN_ON in
2230 * btrfs_delalloc_reserve_space to catch offenders.
2231 */
2232 mutex_lock(&inode->i_mutex);
2194 ret = btrfs_truncate(inode); 2233 ret = btrfs_truncate(inode);
2234 mutex_unlock(&inode->i_mutex);
2195 } else { 2235 } else {
2196 nr_unlink++; 2236 nr_unlink++;
2197 } 2237 }
@@ -3327,7 +3367,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3327 u64 hint_byte = 0; 3367 u64 hint_byte = 0;
3328 hole_size = last_byte - cur_offset; 3368 hole_size = last_byte - cur_offset;
3329 3369
3330 trans = btrfs_start_transaction(root, 2); 3370 trans = btrfs_start_transaction(root, 3);
3331 if (IS_ERR(trans)) { 3371 if (IS_ERR(trans)) {
3332 err = PTR_ERR(trans); 3372 err = PTR_ERR(trans);
3333 break; 3373 break;
@@ -3337,6 +3377,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3337 cur_offset + hole_size, 3377 cur_offset + hole_size,
3338 &hint_byte, 1); 3378 &hint_byte, 1);
3339 if (err) { 3379 if (err) {
3380 btrfs_update_inode(trans, root, inode);
3340 btrfs_end_transaction(trans, root); 3381 btrfs_end_transaction(trans, root);
3341 break; 3382 break;
3342 } 3383 }
@@ -3346,6 +3387,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3346 0, hole_size, 0, hole_size, 3387 0, hole_size, 0, hole_size,
3347 0, 0, 0); 3388 0, 0, 0);
3348 if (err) { 3389 if (err) {
3390 btrfs_update_inode(trans, root, inode);
3349 btrfs_end_transaction(trans, root); 3391 btrfs_end_transaction(trans, root);
3350 break; 3392 break;
3351 } 3393 }
@@ -3353,6 +3395,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3353 btrfs_drop_extent_cache(inode, hole_start, 3395 btrfs_drop_extent_cache(inode, hole_start,
3354 last_byte - 1, 0); 3396 last_byte - 1, 0);
3355 3397
3398 btrfs_update_inode(trans, root, inode);
3356 btrfs_end_transaction(trans, root); 3399 btrfs_end_transaction(trans, root);
3357 } 3400 }
3358 free_extent_map(em); 3401 free_extent_map(em);
@@ -3370,6 +3413,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3370 3413
3371static int btrfs_setsize(struct inode *inode, loff_t newsize) 3414static int btrfs_setsize(struct inode *inode, loff_t newsize)
3372{ 3415{
3416 struct btrfs_root *root = BTRFS_I(inode)->root;
3417 struct btrfs_trans_handle *trans;
3373 loff_t oldsize = i_size_read(inode); 3418 loff_t oldsize = i_size_read(inode);
3374 int ret; 3419 int ret;
3375 3420
@@ -3377,16 +3422,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3377 return 0; 3422 return 0;
3378 3423
3379 if (newsize > oldsize) { 3424 if (newsize > oldsize) {
3380 i_size_write(inode, newsize);
3381 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3382 truncate_pagecache(inode, oldsize, newsize); 3425 truncate_pagecache(inode, oldsize, newsize);
3383 ret = btrfs_cont_expand(inode, oldsize, newsize); 3426 ret = btrfs_cont_expand(inode, oldsize, newsize);
3384 if (ret) { 3427 if (ret)
3385 btrfs_setsize(inode, oldsize);
3386 return ret; 3428 return ret;
3387 }
3388 3429
3389 mark_inode_dirty(inode); 3430 trans = btrfs_start_transaction(root, 1);
3431 if (IS_ERR(trans))
3432 return PTR_ERR(trans);
3433
3434 i_size_write(inode, newsize);
3435 btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
3436 ret = btrfs_update_inode(trans, root, inode);
3437 btrfs_end_transaction_throttle(trans, root);
3390 } else { 3438 } else {
3391 3439
3392 /* 3440 /*
@@ -3426,9 +3474,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3426 3474
3427 if (attr->ia_valid) { 3475 if (attr->ia_valid) {
3428 setattr_copy(inode, attr); 3476 setattr_copy(inode, attr);
3429 mark_inode_dirty(inode); 3477 err = btrfs_dirty_inode(inode);
3430 3478
3431 if (attr->ia_valid & ATTR_MODE) 3479 if (!err && attr->ia_valid & ATTR_MODE)
3432 err = btrfs_acl_chmod(inode); 3480 err = btrfs_acl_chmod(inode);
3433 } 3481 }
3434 3482
@@ -4204,42 +4252,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4204 * FIXME, needs more benchmarking...there are no reasons other than performance 4252 * FIXME, needs more benchmarking...there are no reasons other than performance
4205 * to keep or drop this code. 4253 * to keep or drop this code.
4206 */ 4254 */
4207void btrfs_dirty_inode(struct inode *inode, int flags) 4255int btrfs_dirty_inode(struct inode *inode)
4208{ 4256{
4209 struct btrfs_root *root = BTRFS_I(inode)->root; 4257 struct btrfs_root *root = BTRFS_I(inode)->root;
4210 struct btrfs_trans_handle *trans; 4258 struct btrfs_trans_handle *trans;
4211 int ret; 4259 int ret;
4212 4260
4213 if (BTRFS_I(inode)->dummy_inode) 4261 if (BTRFS_I(inode)->dummy_inode)
4214 return; 4262 return 0;
4215 4263
4216 trans = btrfs_join_transaction(root); 4264 trans = btrfs_join_transaction(root);
4217 BUG_ON(IS_ERR(trans)); 4265 if (IS_ERR(trans))
4266 return PTR_ERR(trans);
4218 4267
4219 ret = btrfs_update_inode(trans, root, inode); 4268 ret = btrfs_update_inode(trans, root, inode);
4220 if (ret && ret == -ENOSPC) { 4269 if (ret && ret == -ENOSPC) {
4221 /* whoops, lets try again with the full transaction */ 4270 /* whoops, lets try again with the full transaction */
4222 btrfs_end_transaction(trans, root); 4271 btrfs_end_transaction(trans, root);
4223 trans = btrfs_start_transaction(root, 1); 4272 trans = btrfs_start_transaction(root, 1);
4224 if (IS_ERR(trans)) { 4273 if (IS_ERR(trans))
4225 printk_ratelimited(KERN_ERR "btrfs: fail to " 4274 return PTR_ERR(trans);
4226 "dirty inode %llu error %ld\n",
4227 (unsigned long long)btrfs_ino(inode),
4228 PTR_ERR(trans));
4229 return;
4230 }
4231 4275
4232 ret = btrfs_update_inode(trans, root, inode); 4276 ret = btrfs_update_inode(trans, root, inode);
4233 if (ret) {
4234 printk_ratelimited(KERN_ERR "btrfs: fail to "
4235 "dirty inode %llu error %d\n",
4236 (unsigned long long)btrfs_ino(inode),
4237 ret);
4238 }
4239 } 4277 }
4240 btrfs_end_transaction(trans, root); 4278 btrfs_end_transaction(trans, root);
4241 if (BTRFS_I(inode)->delayed_node) 4279 if (BTRFS_I(inode)->delayed_node)
4242 btrfs_balance_delayed_items(root); 4280 btrfs_balance_delayed_items(root);
4281
4282 return ret;
4283}
4284
4285/*
4286 * This is a copy of file_update_time. We need this so we can return error on
4287 * ENOSPC for updating the inode in the case of file write and mmap writes.
4288 */
4289int btrfs_update_time(struct file *file)
4290{
4291 struct inode *inode = file->f_path.dentry->d_inode;
4292 struct timespec now;
4293 int ret;
4294 enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
4295
4296 /* First try to exhaust all avenues to not sync */
4297 if (IS_NOCMTIME(inode))
4298 return 0;
4299
4300 now = current_fs_time(inode->i_sb);
4301 if (!timespec_equal(&inode->i_mtime, &now))
4302 sync_it = S_MTIME;
4303
4304 if (!timespec_equal(&inode->i_ctime, &now))
4305 sync_it |= S_CTIME;
4306
4307 if (IS_I_VERSION(inode))
4308 sync_it |= S_VERSION;
4309
4310 if (!sync_it)
4311 return 0;
4312
4313 /* Finally allowed to write? Takes lock. */
4314 if (mnt_want_write_file(file))
4315 return 0;
4316
4317 /* Only change inode inside the lock region */
4318 if (sync_it & S_VERSION)
4319 inode_inc_iversion(inode);
4320 if (sync_it & S_CTIME)
4321 inode->i_ctime = now;
4322 if (sync_it & S_MTIME)
4323 inode->i_mtime = now;
4324 ret = btrfs_dirty_inode(inode);
4325 if (!ret)
4326 mark_inode_dirty_sync(inode);
4327 mnt_drop_write(file->f_path.mnt);
4328 return ret;
4243} 4329}
4244 4330
4245/* 4331/*
@@ -4326,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
4326 struct btrfs_root *root, 4412 struct btrfs_root *root,
4327 struct inode *dir, 4413 struct inode *dir,
4328 const char *name, int name_len, 4414 const char *name, int name_len,
4329 u64 ref_objectid, u64 objectid, int mode, 4415 u64 ref_objectid, u64 objectid,
4330 u64 *index) 4416 umode_t mode, u64 *index)
4331{ 4417{
4332 struct inode *inode; 4418 struct inode *inode;
4333 struct btrfs_inode_item *inode_item; 4419 struct btrfs_inode_item *inode_item;
@@ -4504,17 +4590,13 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
4504 int err = btrfs_add_link(trans, dir, inode, 4590 int err = btrfs_add_link(trans, dir, inode,
4505 dentry->d_name.name, dentry->d_name.len, 4591 dentry->d_name.name, dentry->d_name.len,
4506 backref, index); 4592 backref, index);
4507 if (!err) {
4508 d_instantiate(dentry, inode);
4509 return 0;
4510 }
4511 if (err > 0) 4593 if (err > 0)
4512 err = -EEXIST; 4594 err = -EEXIST;
4513 return err; 4595 return err;
4514} 4596}
4515 4597
4516static int btrfs_mknod(struct inode *dir, struct dentry *dentry, 4598static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4517 int mode, dev_t rdev) 4599 umode_t mode, dev_t rdev)
4518{ 4600{
4519 struct btrfs_trans_handle *trans; 4601 struct btrfs_trans_handle *trans;
4520 struct btrfs_root *root = BTRFS_I(dir)->root; 4602 struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -4555,13 +4637,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4555 goto out_unlock; 4637 goto out_unlock;
4556 } 4638 }
4557 4639
4640 /*
4641 * If the active LSM wants to access the inode during
4642 * d_instantiate it needs these. Smack checks to see
4643 * if the filesystem supports xattrs by looking at the
4644 * ops vector.
4645 */
4646
4647 inode->i_op = &btrfs_special_inode_operations;
4558 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4648 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4559 if (err) 4649 if (err)
4560 drop_inode = 1; 4650 drop_inode = 1;
4561 else { 4651 else {
4562 inode->i_op = &btrfs_special_inode_operations;
4563 init_special_inode(inode, inode->i_mode, rdev); 4652 init_special_inode(inode, inode->i_mode, rdev);
4564 btrfs_update_inode(trans, root, inode); 4653 btrfs_update_inode(trans, root, inode);
4654 d_instantiate(dentry, inode);
4565 } 4655 }
4566out_unlock: 4656out_unlock:
4567 nr = trans->blocks_used; 4657 nr = trans->blocks_used;
@@ -4575,7 +4665,7 @@ out_unlock:
4575} 4665}
4576 4666
4577static int btrfs_create(struct inode *dir, struct dentry *dentry, 4667static int btrfs_create(struct inode *dir, struct dentry *dentry,
4578 int mode, struct nameidata *nd) 4668 umode_t mode, struct nameidata *nd)
4579{ 4669{
4580 struct btrfs_trans_handle *trans; 4670 struct btrfs_trans_handle *trans;
4581 struct btrfs_root *root = BTRFS_I(dir)->root; 4671 struct btrfs_root *root = BTRFS_I(dir)->root;
@@ -4613,15 +4703,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4613 goto out_unlock; 4703 goto out_unlock;
4614 } 4704 }
4615 4705
4706 /*
4707 * If the active LSM wants to access the inode during
4708 * d_instantiate it needs these. Smack checks to see
4709 * if the filesystem supports xattrs by looking at the
4710 * ops vector.
4711 */
4712 inode->i_fop = &btrfs_file_operations;
4713 inode->i_op = &btrfs_file_inode_operations;
4714
4616 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 4715 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
4617 if (err) 4716 if (err)
4618 drop_inode = 1; 4717 drop_inode = 1;
4619 else { 4718 else {
4620 inode->i_mapping->a_ops = &btrfs_aops; 4719 inode->i_mapping->a_ops = &btrfs_aops;
4621 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 4720 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
4622 inode->i_fop = &btrfs_file_operations;
4623 inode->i_op = &btrfs_file_inode_operations;
4624 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 4721 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
4722 d_instantiate(dentry, inode);
4625 } 4723 }
4626out_unlock: 4724out_unlock:
4627 nr = trans->blocks_used; 4725 nr = trans->blocks_used;
@@ -4679,6 +4777,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4679 struct dentry *parent = dentry->d_parent; 4777 struct dentry *parent = dentry->d_parent;
4680 err = btrfs_update_inode(trans, root, inode); 4778 err = btrfs_update_inode(trans, root, inode);
4681 BUG_ON(err); 4779 BUG_ON(err);
4780 d_instantiate(dentry, inode);
4682 btrfs_log_new_name(trans, inode, NULL, parent); 4781 btrfs_log_new_name(trans, inode, NULL, parent);
4683 } 4782 }
4684 4783
@@ -4693,7 +4792,7 @@ fail:
4693 return err; 4792 return err;
4694} 4793}
4695 4794
4696static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 4795static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4697{ 4796{
4698 struct inode *inode = NULL; 4797 struct inode *inode = NULL;
4699 struct btrfs_trans_handle *trans; 4798 struct btrfs_trans_handle *trans;
@@ -6303,7 +6402,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
6303 u64 page_start; 6402 u64 page_start;
6304 u64 page_end; 6403 u64 page_end;
6305 6404
6405 /* Need this to keep space reservations serialized */
6406 mutex_lock(&inode->i_mutex);
6306 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); 6407 ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
6408 mutex_unlock(&inode->i_mutex);
6409 if (!ret)
6410 ret = btrfs_update_time(vma->vm_file);
6307 if (ret) { 6411 if (ret) {
6308 if (ret == -ENOMEM) 6412 if (ret == -ENOMEM)
6309 ret = VM_FAULT_OOM; 6413 ret = VM_FAULT_OOM;
@@ -6515,8 +6619,9 @@ static int btrfs_truncate(struct inode *inode)
6515 /* Just need the 1 for updating the inode */ 6619 /* Just need the 1 for updating the inode */
6516 trans = btrfs_start_transaction(root, 1); 6620 trans = btrfs_start_transaction(root, 1);
6517 if (IS_ERR(trans)) { 6621 if (IS_ERR(trans)) {
6518 err = PTR_ERR(trans); 6622 ret = err = PTR_ERR(trans);
6519 goto out; 6623 trans = NULL;
6624 break;
6520 } 6625 }
6521 } 6626 }
6522 6627
@@ -6656,7 +6761,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
6656static void btrfs_i_callback(struct rcu_head *head) 6761static void btrfs_i_callback(struct rcu_head *head)
6657{ 6762{
6658 struct inode *inode = container_of(head, struct inode, i_rcu); 6763 struct inode *inode = container_of(head, struct inode, i_rcu);
6659 INIT_LIST_HEAD(&inode->i_dentry);
6660 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); 6764 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
6661} 6765}
6662 6766
@@ -7076,14 +7180,21 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7076 goto out_unlock; 7180 goto out_unlock;
7077 } 7181 }
7078 7182
7183 /*
7184 * If the active LSM wants to access the inode during
7185 * d_instantiate it needs these. Smack checks to see
7186 * if the filesystem supports xattrs by looking at the
7187 * ops vector.
7188 */
7189 inode->i_fop = &btrfs_file_operations;
7190 inode->i_op = &btrfs_file_inode_operations;
7191
7079 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); 7192 err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
7080 if (err) 7193 if (err)
7081 drop_inode = 1; 7194 drop_inode = 1;
7082 else { 7195 else {
7083 inode->i_mapping->a_ops = &btrfs_aops; 7196 inode->i_mapping->a_ops = &btrfs_aops;
7084 inode->i_mapping->backing_dev_info = &root->fs_info->bdi; 7197 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
7085 inode->i_fop = &btrfs_file_operations;
7086 inode->i_op = &btrfs_file_inode_operations;
7087 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; 7198 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
7088 } 7199 }
7089 if (drop_inode) 7200 if (drop_inode)
@@ -7132,6 +7243,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7132 drop_inode = 1; 7243 drop_inode = 1;
7133 7244
7134out_unlock: 7245out_unlock:
7246 if (!err)
7247 d_instantiate(dentry, inode);
7135 nr = trans->blocks_used; 7248 nr = trans->blocks_used;
7136 btrfs_end_transaction_throttle(trans, root); 7249 btrfs_end_transaction_throttle(trans, root);
7137 if (drop_inode) { 7250 if (drop_inode) {
@@ -7353,6 +7466,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7353 .follow_link = page_follow_link_light, 7466 .follow_link = page_follow_link_light,
7354 .put_link = page_put_link, 7467 .put_link = page_put_link,
7355 .getattr = btrfs_getattr, 7468 .getattr = btrfs_getattr,
7469 .setattr = btrfs_setattr,
7356 .permission = btrfs_permission, 7470 .permission = btrfs_permission,
7357 .setxattr = btrfs_setxattr, 7471 .setxattr = btrfs_setxattr,
7358 .getxattr = btrfs_getxattr, 7472 .getxattr = btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 72d461656f60..5441ff1480fd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -201,7 +201,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
201 } 201 }
202 } 202 }
203 203
204 ret = mnt_want_write(file->f_path.mnt); 204 ret = mnt_want_write_file(file);
205 if (ret) 205 if (ret)
206 goto out_unlock; 206 goto out_unlock;
207 207
@@ -252,14 +252,14 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
252 trans = btrfs_join_transaction(root); 252 trans = btrfs_join_transaction(root);
253 BUG_ON(IS_ERR(trans)); 253 BUG_ON(IS_ERR(trans));
254 254
255 btrfs_update_iflags(inode);
256 inode->i_ctime = CURRENT_TIME;
255 ret = btrfs_update_inode(trans, root, inode); 257 ret = btrfs_update_inode(trans, root, inode);
256 BUG_ON(ret); 258 BUG_ON(ret);
257 259
258 btrfs_update_iflags(inode);
259 inode->i_ctime = CURRENT_TIME;
260 btrfs_end_transaction(trans, root); 260 btrfs_end_transaction(trans, root);
261 261
262 mnt_drop_write(file->f_path.mnt); 262 mnt_drop_write_file(file);
263 263
264 ret = 0; 264 ret = 0;
265 out_unlock: 265 out_unlock:
@@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode,
858 return 0; 858 return 0;
859 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 859 file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
860 860
861 mutex_lock(&inode->i_mutex);
861 ret = btrfs_delalloc_reserve_space(inode, 862 ret = btrfs_delalloc_reserve_space(inode,
862 num_pages << PAGE_CACHE_SHIFT); 863 num_pages << PAGE_CACHE_SHIFT);
864 mutex_unlock(&inode->i_mutex);
863 if (ret) 865 if (ret)
864 return ret; 866 return ret;
865again: 867again:
@@ -1853,7 +1855,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1853 goto out; 1855 goto out;
1854 } 1856 }
1855 1857
1856 err = mnt_want_write(file->f_path.mnt); 1858 err = mnt_want_write_file(file);
1857 if (err) 1859 if (err)
1858 goto out; 1860 goto out;
1859 1861
@@ -1969,7 +1971,7 @@ out_dput:
1969 dput(dentry); 1971 dput(dentry);
1970out_unlock_dir: 1972out_unlock_dir:
1971 mutex_unlock(&dir->i_mutex); 1973 mutex_unlock(&dir->i_mutex);
1972 mnt_drop_write(file->f_path.mnt); 1974 mnt_drop_write_file(file);
1973out: 1975out:
1974 kfree(vol_args); 1976 kfree(vol_args);
1975 return err; 1977 return err;
@@ -1985,7 +1987,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
1985 if (btrfs_root_readonly(root)) 1987 if (btrfs_root_readonly(root))
1986 return -EROFS; 1988 return -EROFS;
1987 1989
1988 ret = mnt_want_write(file->f_path.mnt); 1990 ret = mnt_want_write_file(file);
1989 if (ret) 1991 if (ret)
1990 return ret; 1992 return ret;
1991 1993
@@ -2038,7 +2040,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2038 ret = -EINVAL; 2040 ret = -EINVAL;
2039 } 2041 }
2040out: 2042out:
2041 mnt_drop_write(file->f_path.mnt); 2043 mnt_drop_write_file(file);
2042 return ret; 2044 return ret;
2043} 2045}
2044 2046
@@ -2193,7 +2195,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
2193 if (btrfs_root_readonly(root)) 2195 if (btrfs_root_readonly(root))
2194 return -EROFS; 2196 return -EROFS;
2195 2197
2196 ret = mnt_want_write(file->f_path.mnt); 2198 ret = mnt_want_write_file(file);
2197 if (ret) 2199 if (ret)
2198 return ret; 2200 return ret;
2199 2201
@@ -2508,7 +2510,7 @@ out_unlock:
2508out_fput: 2510out_fput:
2509 fput(src_file); 2511 fput(src_file);
2510out_drop_write: 2512out_drop_write:
2511 mnt_drop_write(file->f_path.mnt); 2513 mnt_drop_write_file(file);
2512 return ret; 2514 return ret;
2513} 2515}
2514 2516
@@ -2547,7 +2549,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2547 if (btrfs_root_readonly(root)) 2549 if (btrfs_root_readonly(root))
2548 goto out; 2550 goto out;
2549 2551
2550 ret = mnt_want_write(file->f_path.mnt); 2552 ret = mnt_want_write_file(file);
2551 if (ret) 2553 if (ret)
2552 goto out; 2554 goto out;
2553 2555
@@ -2563,7 +2565,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
2563 2565
2564out_drop: 2566out_drop:
2565 atomic_dec(&root->fs_info->open_ioctl_trans); 2567 atomic_dec(&root->fs_info->open_ioctl_trans);
2566 mnt_drop_write(file->f_path.mnt); 2568 mnt_drop_write_file(file);
2567out: 2569out:
2568 return ret; 2570 return ret;
2569} 2571}
@@ -2798,7 +2800,7 @@ long btrfs_ioctl_trans_end(struct file *file)
2798 2800
2799 atomic_dec(&root->fs_info->open_ioctl_trans); 2801 atomic_dec(&root->fs_info->open_ioctl_trans);
2800 2802
2801 mnt_drop_write(file->f_path.mnt); 2803 mnt_drop_write_file(file);
2802 return 0; 2804 return 0;
2803} 2805}
2804 2806
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index dff29d5e151a..cfb55434a469 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2947,7 +2947,9 @@ static int relocate_file_extent_cluster(struct inode *inode,
2947 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 2947 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2948 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 2948 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2949 while (index <= last_index) { 2949 while (index <= last_index) {
2950 mutex_lock(&inode->i_mutex);
2950 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); 2951 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
2952 mutex_unlock(&inode->i_mutex);
2951 if (ret) 2953 if (ret)
2952 goto out; 2954 goto out;
2953 2955
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c27bcb67f330..ddf2c90d3fc0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1535,18 +1535,22 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev)
1535static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) 1535static noinline_for_stack int scrub_workers_get(struct btrfs_root *root)
1536{ 1536{
1537 struct btrfs_fs_info *fs_info = root->fs_info; 1537 struct btrfs_fs_info *fs_info = root->fs_info;
1538 int ret = 0;
1538 1539
1539 mutex_lock(&fs_info->scrub_lock); 1540 mutex_lock(&fs_info->scrub_lock);
1540 if (fs_info->scrub_workers_refcnt == 0) { 1541 if (fs_info->scrub_workers_refcnt == 0) {
1541 btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1542 btrfs_init_workers(&fs_info->scrub_workers, "scrub",
1542 fs_info->thread_pool_size, &fs_info->generic_worker); 1543 fs_info->thread_pool_size, &fs_info->generic_worker);
1543 fs_info->scrub_workers.idle_thresh = 4; 1544 fs_info->scrub_workers.idle_thresh = 4;
1544 btrfs_start_workers(&fs_info->scrub_workers, 1); 1545 ret = btrfs_start_workers(&fs_info->scrub_workers);
1546 if (ret)
1547 goto out;
1545 } 1548 }
1546 ++fs_info->scrub_workers_refcnt; 1549 ++fs_info->scrub_workers_refcnt;
1550out:
1547 mutex_unlock(&fs_info->scrub_lock); 1551 mutex_unlock(&fs_info->scrub_lock);
1548 1552
1549 return 0; 1553 return ret;
1550} 1554}
1551 1555
1552static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) 1556static noinline_for_stack void scrub_workers_put(struct btrfs_root *root)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e28ad4baf483..ae488aa1966a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -40,7 +40,7 @@
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h> 42#include <linux/cleancache.h>
43#include <linux/mnt_namespace.h> 43#include <linux/ratelimit.h>
44#include "compat.h" 44#include "compat.h"
45#include "delayed-inode.h" 45#include "delayed-inode.h"
46#include "ctree.h" 46#include "ctree.h"
@@ -661,9 +661,9 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
661 return ret; 661 return ret;
662} 662}
663 663
664static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 664static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
665{ 665{
666 struct btrfs_root *root = btrfs_sb(vfs->mnt_sb); 666 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
667 struct btrfs_fs_info *info = root->fs_info; 667 struct btrfs_fs_info *info = root->fs_info;
668 char *compress_type; 668 char *compress_type;
669 669
@@ -1053,7 +1053,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1053 u64 avail_space; 1053 u64 avail_space;
1054 u64 used_space; 1054 u64 used_space;
1055 u64 min_stripe_size; 1055 u64 min_stripe_size;
1056 int min_stripes = 1; 1056 int min_stripes = 1, num_stripes = 1;
1057 int i = 0, nr_devices; 1057 int i = 0, nr_devices;
1058 int ret; 1058 int ret;
1059 1059
@@ -1067,12 +1067,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1067 1067
1068 /* calc min stripe number for data space alloction */ 1068 /* calc min stripe number for data space alloction */
1069 type = btrfs_get_alloc_profile(root, 1); 1069 type = btrfs_get_alloc_profile(root, 1);
1070 if (type & BTRFS_BLOCK_GROUP_RAID0) 1070 if (type & BTRFS_BLOCK_GROUP_RAID0) {
1071 min_stripes = 2; 1071 min_stripes = 2;
1072 else if (type & BTRFS_BLOCK_GROUP_RAID1) 1072 num_stripes = nr_devices;
1073 } else if (type & BTRFS_BLOCK_GROUP_RAID1) {
1073 min_stripes = 2; 1074 min_stripes = 2;
1074 else if (type & BTRFS_BLOCK_GROUP_RAID10) 1075 num_stripes = 2;
1076 } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
1075 min_stripes = 4; 1077 min_stripes = 4;
1078 num_stripes = 4;
1079 }
1076 1080
1077 if (type & BTRFS_BLOCK_GROUP_DUP) 1081 if (type & BTRFS_BLOCK_GROUP_DUP)
1078 min_stripe_size = 2 * BTRFS_STRIPE_LEN; 1082 min_stripe_size = 2 * BTRFS_STRIPE_LEN;
@@ -1141,13 +1145,16 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1141 i = nr_devices - 1; 1145 i = nr_devices - 1;
1142 avail_space = 0; 1146 avail_space = 0;
1143 while (nr_devices >= min_stripes) { 1147 while (nr_devices >= min_stripes) {
1148 if (num_stripes > nr_devices)
1149 num_stripes = nr_devices;
1150
1144 if (devices_info[i].max_avail >= min_stripe_size) { 1151 if (devices_info[i].max_avail >= min_stripe_size) {
1145 int j; 1152 int j;
1146 u64 alloc_size; 1153 u64 alloc_size;
1147 1154
1148 avail_space += devices_info[i].max_avail * min_stripes; 1155 avail_space += devices_info[i].max_avail * num_stripes;
1149 alloc_size = devices_info[i].max_avail; 1156 alloc_size = devices_info[i].max_avail;
1150 for (j = i + 1 - min_stripes; j <= i; j++) 1157 for (j = i + 1 - num_stripes; j <= i; j++)
1151 devices_info[j].max_avail -= alloc_size; 1158 devices_info[j].max_avail -= alloc_size;
1152 } 1159 }
1153 i--; 1160 i--;
@@ -1264,6 +1271,16 @@ static int btrfs_unfreeze(struct super_block *sb)
1264 return 0; 1271 return 0;
1265} 1272}
1266 1273
1274static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
1275{
1276 int ret;
1277
1278 ret = btrfs_dirty_inode(inode);
1279 if (ret)
1280 printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
1281 "error %d\n", btrfs_ino(inode), ret);
1282}
1283
1267static const struct super_operations btrfs_super_ops = { 1284static const struct super_operations btrfs_super_ops = {
1268 .drop_inode = btrfs_drop_inode, 1285 .drop_inode = btrfs_drop_inode,
1269 .evict_inode = btrfs_evict_inode, 1286 .evict_inode = btrfs_evict_inode,
@@ -1271,7 +1288,7 @@ static const struct super_operations btrfs_super_ops = {
1271 .sync_fs = btrfs_sync_fs, 1288 .sync_fs = btrfs_sync_fs,
1272 .show_options = btrfs_show_options, 1289 .show_options = btrfs_show_options,
1273 .write_inode = btrfs_write_inode, 1290 .write_inode = btrfs_write_inode,
1274 .dirty_inode = btrfs_dirty_inode, 1291 .dirty_inode = btrfs_fs_dirty_inode,
1275 .alloc_inode = btrfs_alloc_inode, 1292 .alloc_inode = btrfs_alloc_inode,
1276 .destroy_inode = btrfs_destroy_inode, 1293 .destroy_inode = btrfs_destroy_inode,
1277 .statfs = btrfs_statfs, 1294 .statfs = btrfs_statfs,
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0a8c8f8304b1..f4b839fd3c9d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -295,6 +295,12 @@ loop_lock:
295 btrfs_requeue_work(&device->work); 295 btrfs_requeue_work(&device->work);
296 goto done; 296 goto done;
297 } 297 }
298 /* unplug every 64 requests just for good measure */
299 if (batch_run % 64 == 0) {
300 blk_finish_plug(&plug);
301 blk_start_plug(&plug);
302 sync_pending = 0;
303 }
298 } 304 }
299 305
300 cond_resched(); 306 cond_resched();
@@ -3258,7 +3264,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
3258 */ 3264 */
3259 if (atomic_read(&bbio->error) > bbio->max_errors) { 3265 if (atomic_read(&bbio->error) > bbio->max_errors) {
3260 err = -EIO; 3266 err = -EIO;
3261 } else if (err) { 3267 } else {
3262 /* 3268 /*
3263 * this bio is actually up to date, we didn't 3269 * this bio is actually up to date, we didn't
3264 * go over the max number of errors 3270 * go over the max number of errors
diff --git a/fs/buffer.c b/fs/buffer.c
index 19d8eb7fdc81..1a30db77af32 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,7 +41,6 @@
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/mpage.h> 42#include <linux/mpage.h>
43#include <linux/bit_spinlock.h> 43#include <linux/bit_spinlock.h>
44#include <linux/cleancache.h>
45 44
46static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); 45static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
47 46
@@ -231,55 +230,6 @@ out:
231 return ret; 230 return ret;
232} 231}
233 232
234/* If invalidate_buffers() will trash dirty buffers, it means some kind
235 of fs corruption is going on. Trashing dirty data always imply losing
236 information that was supposed to be just stored on the physical layer
237 by the user.
238
239 Thus invalidate_buffers in general usage is not allwowed to trash
240 dirty buffers. For example ioctl(FLSBLKBUF) expects dirty data to
241 be preserved. These buffers are simply skipped.
242
243 We also skip buffers which are still in use. For example this can
244 happen if a userspace program is reading the block device.
245
246 NOTE: In the case where the user removed a removable-media-disk even if
247 there's still dirty data not synced on disk (due a bug in the device driver
248 or due an error of the user), by not destroying the dirty buffers we could
249 generate corruption also on the next media inserted, thus a parameter is
250 necessary to handle this case in the most safe way possible (trying
251 to not corrupt also the new disk inserted with the data belonging to
252 the old now corrupted disk). Also for the ramdisk the natural thing
253 to do in order to release the ramdisk memory is to destroy dirty buffers.
254
255 These are two special cases. Normal usage imply the device driver
256 to issue a sync on the device (without waiting I/O completion) and
257 then an invalidate_buffers call that doesn't trash dirty buffers.
258
259 For handling cache coherency with the blkdev pagecache the 'update' case
260 is been introduced. It is needed to re-read from disk any pinned
261 buffer. NOTE: re-reading from disk is destructive so we can do it only
262 when we assume nobody is changing the buffercache under our I/O and when
263 we think the disk contains more recent information than the buffercache.
264 The update == 1 pass marks the buffers we need to update, the update == 2
265 pass does the actual I/O. */
266void invalidate_bdev(struct block_device *bdev)
267{
268 struct address_space *mapping = bdev->bd_inode->i_mapping;
269
270 if (mapping->nrpages == 0)
271 return;
272
273 invalidate_bh_lrus();
274 lru_add_drain_all(); /* make sure all lru add caches are flushed */
275 invalidate_mapping_pages(mapping, 0, -1);
276 /* 99% of the time, we don't need to flush the cleancache on the bdev.
277 * But, for the strange corners, lets be cautious
278 */
279 cleancache_flush_inode(mapping);
280}
281EXPORT_SYMBOL(invalidate_bdev);
282
283/* 233/*
284 * Kick the writeback threads then try to free up some ZONE_NORMAL memory. 234 * Kick the writeback threads then try to free up some ZONE_NORMAL memory.
285 */ 235 */
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 1064805e653b..67bef6d01484 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -11,7 +11,6 @@
11 11
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/mount.h> 13#include <linux/mount.h>
14#include <linux/buffer_head.h>
15#include "internal.h" 14#include "internal.h"
16 15
17#define list_to_page(head) (list_entry((head)->prev, struct page, lru)) 16#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 4144caf2f9d3..173b1d22e59b 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
87 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); 87 snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
88 88
89 /* dirty the head */ 89 /* dirty the head */
90 spin_lock(&inode->i_lock); 90 spin_lock(&ci->i_ceph_lock);
91 if (ci->i_head_snapc == NULL) 91 if (ci->i_head_snapc == NULL)
92 ci->i_head_snapc = ceph_get_snap_context(snapc); 92 ci->i_head_snapc = ceph_get_snap_context(snapc);
93 ++ci->i_wrbuffer_ref_head; 93 ++ci->i_wrbuffer_ref_head;
@@ -100,7 +100,7 @@ static int ceph_set_page_dirty(struct page *page)
100 ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1, 100 ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
101 ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head, 101 ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
102 snapc, snapc->seq, snapc->num_snaps); 102 snapc, snapc->seq, snapc->num_snaps);
103 spin_unlock(&inode->i_lock); 103 spin_unlock(&ci->i_ceph_lock);
104 104
105 /* now adjust page */ 105 /* now adjust page */
106 spin_lock_irq(&mapping->tree_lock); 106 spin_lock_irq(&mapping->tree_lock);
@@ -391,7 +391,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
391 struct ceph_snap_context *snapc = NULL; 391 struct ceph_snap_context *snapc = NULL;
392 struct ceph_cap_snap *capsnap = NULL; 392 struct ceph_cap_snap *capsnap = NULL;
393 393
394 spin_lock(&inode->i_lock); 394 spin_lock(&ci->i_ceph_lock);
395 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 395 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
396 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap, 396 dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
397 capsnap->context, capsnap->dirty_pages); 397 capsnap->context, capsnap->dirty_pages);
@@ -407,7 +407,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
407 dout(" head snapc %p has %d dirty pages\n", 407 dout(" head snapc %p has %d dirty pages\n",
408 snapc, ci->i_wrbuffer_ref_head); 408 snapc, ci->i_wrbuffer_ref_head);
409 } 409 }
410 spin_unlock(&inode->i_lock); 410 spin_unlock(&ci->i_ceph_lock);
411 return snapc; 411 return snapc;
412} 412}
413 413
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 0f327c6c9679..b60fc8bfb3e9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -309,7 +309,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
309/* 309/*
310 * Find ceph_cap for given mds, if any. 310 * Find ceph_cap for given mds, if any.
311 * 311 *
312 * Called with i_lock held. 312 * Called with i_ceph_lock held.
313 */ 313 */
314static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds) 314static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
315{ 315{
@@ -332,9 +332,9 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
332{ 332{
333 struct ceph_cap *cap; 333 struct ceph_cap *cap;
334 334
335 spin_lock(&ci->vfs_inode.i_lock); 335 spin_lock(&ci->i_ceph_lock);
336 cap = __get_cap_for_mds(ci, mds); 336 cap = __get_cap_for_mds(ci, mds);
337 spin_unlock(&ci->vfs_inode.i_lock); 337 spin_unlock(&ci->i_ceph_lock);
338 return cap; 338 return cap;
339} 339}
340 340
@@ -361,15 +361,16 @@ static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
361 361
362int ceph_get_cap_mds(struct inode *inode) 362int ceph_get_cap_mds(struct inode *inode)
363{ 363{
364 struct ceph_inode_info *ci = ceph_inode(inode);
364 int mds; 365 int mds;
365 spin_lock(&inode->i_lock); 366 spin_lock(&ci->i_ceph_lock);
366 mds = __ceph_get_cap_mds(ceph_inode(inode)); 367 mds = __ceph_get_cap_mds(ceph_inode(inode));
367 spin_unlock(&inode->i_lock); 368 spin_unlock(&ci->i_ceph_lock);
368 return mds; 369 return mds;
369} 370}
370 371
371/* 372/*
372 * Called under i_lock. 373 * Called under i_ceph_lock.
373 */ 374 */
374static void __insert_cap_node(struct ceph_inode_info *ci, 375static void __insert_cap_node(struct ceph_inode_info *ci,
375 struct ceph_cap *new) 376 struct ceph_cap *new)
@@ -415,7 +416,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
415 * 416 *
416 * If I_FLUSH is set, leave the inode at the front of the list. 417 * If I_FLUSH is set, leave the inode at the front of the list.
417 * 418 *
418 * Caller holds i_lock 419 * Caller holds i_ceph_lock
419 * -> we take mdsc->cap_delay_lock 420 * -> we take mdsc->cap_delay_lock
420 */ 421 */
421static void __cap_delay_requeue(struct ceph_mds_client *mdsc, 422static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
@@ -457,7 +458,7 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
457/* 458/*
458 * Cancel delayed work on cap. 459 * Cancel delayed work on cap.
459 * 460 *
460 * Caller must hold i_lock. 461 * Caller must hold i_ceph_lock.
461 */ 462 */
462static void __cap_delay_cancel(struct ceph_mds_client *mdsc, 463static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
463 struct ceph_inode_info *ci) 464 struct ceph_inode_info *ci)
@@ -532,14 +533,14 @@ int ceph_add_cap(struct inode *inode,
532 wanted |= ceph_caps_for_mode(fmode); 533 wanted |= ceph_caps_for_mode(fmode);
533 534
534retry: 535retry:
535 spin_lock(&inode->i_lock); 536 spin_lock(&ci->i_ceph_lock);
536 cap = __get_cap_for_mds(ci, mds); 537 cap = __get_cap_for_mds(ci, mds);
537 if (!cap) { 538 if (!cap) {
538 if (new_cap) { 539 if (new_cap) {
539 cap = new_cap; 540 cap = new_cap;
540 new_cap = NULL; 541 new_cap = NULL;
541 } else { 542 } else {
542 spin_unlock(&inode->i_lock); 543 spin_unlock(&ci->i_ceph_lock);
543 new_cap = get_cap(mdsc, caps_reservation); 544 new_cap = get_cap(mdsc, caps_reservation);
544 if (new_cap == NULL) 545 if (new_cap == NULL)
545 return -ENOMEM; 546 return -ENOMEM;
@@ -625,7 +626,7 @@ retry:
625 626
626 if (fmode >= 0) 627 if (fmode >= 0)
627 __ceph_get_fmode(ci, fmode); 628 __ceph_get_fmode(ci, fmode);
628 spin_unlock(&inode->i_lock); 629 spin_unlock(&ci->i_ceph_lock);
629 wake_up_all(&ci->i_cap_wq); 630 wake_up_all(&ci->i_cap_wq);
630 return 0; 631 return 0;
631} 632}
@@ -792,7 +793,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
792 struct rb_node *p; 793 struct rb_node *p;
793 int ret = 0; 794 int ret = 0;
794 795
795 spin_lock(&inode->i_lock); 796 spin_lock(&ci->i_ceph_lock);
796 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 797 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
797 cap = rb_entry(p, struct ceph_cap, ci_node); 798 cap = rb_entry(p, struct ceph_cap, ci_node);
798 if (__cap_is_valid(cap) && 799 if (__cap_is_valid(cap) &&
@@ -801,7 +802,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
801 break; 802 break;
802 } 803 }
803 } 804 }
804 spin_unlock(&inode->i_lock); 805 spin_unlock(&ci->i_ceph_lock);
805 dout("ceph_caps_revoking %p %s = %d\n", inode, 806 dout("ceph_caps_revoking %p %s = %d\n", inode,
806 ceph_cap_string(mask), ret); 807 ceph_cap_string(mask), ret);
807 return ret; 808 return ret;
@@ -855,7 +856,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
855} 856}
856 857
857/* 858/*
858 * called under i_lock 859 * called under i_ceph_lock
859 */ 860 */
860static int __ceph_is_any_caps(struct ceph_inode_info *ci) 861static int __ceph_is_any_caps(struct ceph_inode_info *ci)
861{ 862{
@@ -865,7 +866,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
865/* 866/*
866 * Remove a cap. Take steps to deal with a racing iterate_session_caps. 867 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
867 * 868 *
868 * caller should hold i_lock. 869 * caller should hold i_ceph_lock.
869 * caller will not hold session s_mutex if called from destroy_inode. 870 * caller will not hold session s_mutex if called from destroy_inode.
870 */ 871 */
871void __ceph_remove_cap(struct ceph_cap *cap) 872void __ceph_remove_cap(struct ceph_cap *cap)
@@ -927,7 +928,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
927 u64 size, u64 max_size, 928 u64 size, u64 max_size,
928 struct timespec *mtime, struct timespec *atime, 929 struct timespec *mtime, struct timespec *atime,
929 u64 time_warp_seq, 930 u64 time_warp_seq,
930 uid_t uid, gid_t gid, mode_t mode, 931 uid_t uid, gid_t gid, umode_t mode,
931 u64 xattr_version, 932 u64 xattr_version,
932 struct ceph_buffer *xattrs_buf, 933 struct ceph_buffer *xattrs_buf,
933 u64 follows) 934 u64 follows)
@@ -1028,7 +1029,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,
1028 1029
1029/* 1030/*
1030 * Queue cap releases when an inode is dropped from our cache. Since 1031 * Queue cap releases when an inode is dropped from our cache. Since
1031 * inode is about to be destroyed, there is no need for i_lock. 1032 * inode is about to be destroyed, there is no need for i_ceph_lock.
1032 */ 1033 */
1033void ceph_queue_caps_release(struct inode *inode) 1034void ceph_queue_caps_release(struct inode *inode)
1034{ 1035{
@@ -1049,7 +1050,7 @@ void ceph_queue_caps_release(struct inode *inode)
1049 1050
1050/* 1051/*
1051 * Send a cap msg on the given inode. Update our caps state, then 1052 * Send a cap msg on the given inode. Update our caps state, then
1052 * drop i_lock and send the message. 1053 * drop i_ceph_lock and send the message.
1053 * 1054 *
1054 * Make note of max_size reported/requested from mds, revoked caps 1055 * Make note of max_size reported/requested from mds, revoked caps
1055 * that have now been implemented. 1056 * that have now been implemented.
@@ -1061,13 +1062,13 @@ void ceph_queue_caps_release(struct inode *inode)
1061 * Return non-zero if delayed release, or we experienced an error 1062 * Return non-zero if delayed release, or we experienced an error
1062 * such that the caller should requeue + retry later. 1063 * such that the caller should requeue + retry later.
1063 * 1064 *
1064 * called with i_lock, then drops it. 1065 * called with i_ceph_lock, then drops it.
1065 * caller should hold snap_rwsem (read), s_mutex. 1066 * caller should hold snap_rwsem (read), s_mutex.
1066 */ 1067 */
1067static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, 1068static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1068 int op, int used, int want, int retain, int flushing, 1069 int op, int used, int want, int retain, int flushing,
1069 unsigned *pflush_tid) 1070 unsigned *pflush_tid)
1070 __releases(cap->ci->vfs_inode->i_lock) 1071 __releases(cap->ci->i_ceph_lock)
1071{ 1072{
1072 struct ceph_inode_info *ci = cap->ci; 1073 struct ceph_inode_info *ci = cap->ci;
1073 struct inode *inode = &ci->vfs_inode; 1074 struct inode *inode = &ci->vfs_inode;
@@ -1077,7 +1078,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1077 u64 size, max_size; 1078 u64 size, max_size;
1078 struct timespec mtime, atime; 1079 struct timespec mtime, atime;
1079 int wake = 0; 1080 int wake = 0;
1080 mode_t mode; 1081 umode_t mode;
1081 uid_t uid; 1082 uid_t uid;
1082 gid_t gid; 1083 gid_t gid;
1083 struct ceph_mds_session *session; 1084 struct ceph_mds_session *session;
@@ -1170,7 +1171,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1170 xattr_version = ci->i_xattrs.version; 1171 xattr_version = ci->i_xattrs.version;
1171 } 1172 }
1172 1173
1173 spin_unlock(&inode->i_lock); 1174 spin_unlock(&ci->i_ceph_lock);
1174 1175
1175 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id, 1176 ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
1176 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq, 1177 op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
@@ -1198,13 +1199,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
1198 * Unless @again is true, skip cap_snaps that were already sent to 1199 * Unless @again is true, skip cap_snaps that were already sent to
1199 * the MDS (i.e., during this session). 1200 * the MDS (i.e., during this session).
1200 * 1201 *
1201 * Called under i_lock. Takes s_mutex as needed. 1202 * Called under i_ceph_lock. Takes s_mutex as needed.
1202 */ 1203 */
1203void __ceph_flush_snaps(struct ceph_inode_info *ci, 1204void __ceph_flush_snaps(struct ceph_inode_info *ci,
1204 struct ceph_mds_session **psession, 1205 struct ceph_mds_session **psession,
1205 int again) 1206 int again)
1206 __releases(ci->vfs_inode->i_lock) 1207 __releases(ci->i_ceph_lock)
1207 __acquires(ci->vfs_inode->i_lock) 1208 __acquires(ci->i_ceph_lock)
1208{ 1209{
1209 struct inode *inode = &ci->vfs_inode; 1210 struct inode *inode = &ci->vfs_inode;
1210 int mds; 1211 int mds;
@@ -1261,7 +1262,7 @@ retry:
1261 session = NULL; 1262 session = NULL;
1262 } 1263 }
1263 if (!session) { 1264 if (!session) {
1264 spin_unlock(&inode->i_lock); 1265 spin_unlock(&ci->i_ceph_lock);
1265 mutex_lock(&mdsc->mutex); 1266 mutex_lock(&mdsc->mutex);
1266 session = __ceph_lookup_mds_session(mdsc, mds); 1267 session = __ceph_lookup_mds_session(mdsc, mds);
1267 mutex_unlock(&mdsc->mutex); 1268 mutex_unlock(&mdsc->mutex);
@@ -1275,7 +1276,7 @@ retry:
1275 * deletion or migration. retry, and we'll 1276 * deletion or migration. retry, and we'll
1276 * get a better @mds value next time. 1277 * get a better @mds value next time.
1277 */ 1278 */
1278 spin_lock(&inode->i_lock); 1279 spin_lock(&ci->i_ceph_lock);
1279 goto retry; 1280 goto retry;
1280 } 1281 }
1281 1282
@@ -1285,7 +1286,7 @@ retry:
1285 list_del_init(&capsnap->flushing_item); 1286 list_del_init(&capsnap->flushing_item);
1286 list_add_tail(&capsnap->flushing_item, 1287 list_add_tail(&capsnap->flushing_item,
1287 &session->s_cap_snaps_flushing); 1288 &session->s_cap_snaps_flushing);
1288 spin_unlock(&inode->i_lock); 1289 spin_unlock(&ci->i_ceph_lock);
1289 1290
1290 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", 1291 dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
1291 inode, capsnap, capsnap->follows, capsnap->flush_tid); 1292 inode, capsnap, capsnap->follows, capsnap->flush_tid);
@@ -1302,7 +1303,7 @@ retry:
1302 next_follows = capsnap->follows + 1; 1303 next_follows = capsnap->follows + 1;
1303 ceph_put_cap_snap(capsnap); 1304 ceph_put_cap_snap(capsnap);
1304 1305
1305 spin_lock(&inode->i_lock); 1306 spin_lock(&ci->i_ceph_lock);
1306 goto retry; 1307 goto retry;
1307 } 1308 }
1308 1309
@@ -1322,11 +1323,9 @@ out:
1322 1323
1323static void ceph_flush_snaps(struct ceph_inode_info *ci) 1324static void ceph_flush_snaps(struct ceph_inode_info *ci)
1324{ 1325{
1325 struct inode *inode = &ci->vfs_inode; 1326 spin_lock(&ci->i_ceph_lock);
1326
1327 spin_lock(&inode->i_lock);
1328 __ceph_flush_snaps(ci, NULL, 0); 1327 __ceph_flush_snaps(ci, NULL, 0);
1329 spin_unlock(&inode->i_lock); 1328 spin_unlock(&ci->i_ceph_lock);
1330} 1329}
1331 1330
1332/* 1331/*
@@ -1373,7 +1372,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
1373 * Add dirty inode to the flushing list. Assigned a seq number so we 1372 * Add dirty inode to the flushing list. Assigned a seq number so we
1374 * can wait for caps to flush without starving. 1373 * can wait for caps to flush without starving.
1375 * 1374 *
1376 * Called under i_lock. 1375 * Called under i_ceph_lock.
1377 */ 1376 */
1378static int __mark_caps_flushing(struct inode *inode, 1377static int __mark_caps_flushing(struct inode *inode,
1379 struct ceph_mds_session *session) 1378 struct ceph_mds_session *session)
@@ -1421,9 +1420,9 @@ static int try_nonblocking_invalidate(struct inode *inode)
1421 struct ceph_inode_info *ci = ceph_inode(inode); 1420 struct ceph_inode_info *ci = ceph_inode(inode);
1422 u32 invalidating_gen = ci->i_rdcache_gen; 1421 u32 invalidating_gen = ci->i_rdcache_gen;
1423 1422
1424 spin_unlock(&inode->i_lock); 1423 spin_unlock(&ci->i_ceph_lock);
1425 invalidate_mapping_pages(&inode->i_data, 0, -1); 1424 invalidate_mapping_pages(&inode->i_data, 0, -1);
1426 spin_lock(&inode->i_lock); 1425 spin_lock(&ci->i_ceph_lock);
1427 1426
1428 if (inode->i_data.nrpages == 0 && 1427 if (inode->i_data.nrpages == 0 &&
1429 invalidating_gen == ci->i_rdcache_gen) { 1428 invalidating_gen == ci->i_rdcache_gen) {
@@ -1470,7 +1469,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1470 if (mdsc->stopping) 1469 if (mdsc->stopping)
1471 is_delayed = 1; 1470 is_delayed = 1;
1472 1471
1473 spin_lock(&inode->i_lock); 1472 spin_lock(&ci->i_ceph_lock);
1474 1473
1475 if (ci->i_ceph_flags & CEPH_I_FLUSH) 1474 if (ci->i_ceph_flags & CEPH_I_FLUSH)
1476 flags |= CHECK_CAPS_FLUSH; 1475 flags |= CHECK_CAPS_FLUSH;
@@ -1480,7 +1479,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
1480 __ceph_flush_snaps(ci, &session, 0); 1479 __ceph_flush_snaps(ci, &session, 0);
1481 goto retry_locked; 1480 goto retry_locked;
1482retry: 1481retry:
1483 spin_lock(&inode->i_lock); 1482 spin_lock(&ci->i_ceph_lock);
1484retry_locked: 1483retry_locked:
1485 file_wanted = __ceph_caps_file_wanted(ci); 1484 file_wanted = __ceph_caps_file_wanted(ci);
1486 used = __ceph_caps_used(ci); 1485 used = __ceph_caps_used(ci);
@@ -1634,7 +1633,7 @@ ack:
1634 if (mutex_trylock(&session->s_mutex) == 0) { 1633 if (mutex_trylock(&session->s_mutex) == 0) {
1635 dout("inverting session/ino locks on %p\n", 1634 dout("inverting session/ino locks on %p\n",
1636 session); 1635 session);
1637 spin_unlock(&inode->i_lock); 1636 spin_unlock(&ci->i_ceph_lock);
1638 if (took_snap_rwsem) { 1637 if (took_snap_rwsem) {
1639 up_read(&mdsc->snap_rwsem); 1638 up_read(&mdsc->snap_rwsem);
1640 took_snap_rwsem = 0; 1639 took_snap_rwsem = 0;
@@ -1648,7 +1647,7 @@ ack:
1648 if (down_read_trylock(&mdsc->snap_rwsem) == 0) { 1647 if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
1649 dout("inverting snap/in locks on %p\n", 1648 dout("inverting snap/in locks on %p\n",
1650 inode); 1649 inode);
1651 spin_unlock(&inode->i_lock); 1650 spin_unlock(&ci->i_ceph_lock);
1652 down_read(&mdsc->snap_rwsem); 1651 down_read(&mdsc->snap_rwsem);
1653 took_snap_rwsem = 1; 1652 took_snap_rwsem = 1;
1654 goto retry; 1653 goto retry;
@@ -1664,10 +1663,10 @@ ack:
1664 mds = cap->mds; /* remember mds, so we don't repeat */ 1663 mds = cap->mds; /* remember mds, so we don't repeat */
1665 sent++; 1664 sent++;
1666 1665
1667 /* __send_cap drops i_lock */ 1666 /* __send_cap drops i_ceph_lock */
1668 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, 1667 delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
1669 retain, flushing, NULL); 1668 retain, flushing, NULL);
1670 goto retry; /* retake i_lock and restart our cap scan. */ 1669 goto retry; /* retake i_ceph_lock and restart our cap scan. */
1671 } 1670 }
1672 1671
1673 /* 1672 /*
@@ -1681,7 +1680,7 @@ ack:
1681 else if (!is_delayed || force_requeue) 1680 else if (!is_delayed || force_requeue)
1682 __cap_delay_requeue(mdsc, ci); 1681 __cap_delay_requeue(mdsc, ci);
1683 1682
1684 spin_unlock(&inode->i_lock); 1683 spin_unlock(&ci->i_ceph_lock);
1685 1684
1686 if (queue_invalidate) 1685 if (queue_invalidate)
1687 ceph_queue_invalidate(inode); 1686 ceph_queue_invalidate(inode);
@@ -1704,7 +1703,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
1704 int flushing = 0; 1703 int flushing = 0;
1705 1704
1706retry: 1705retry:
1707 spin_lock(&inode->i_lock); 1706 spin_lock(&ci->i_ceph_lock);
1708 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { 1707 if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
1709 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); 1708 dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
1710 goto out; 1709 goto out;
@@ -1716,7 +1715,7 @@ retry:
1716 int delayed; 1715 int delayed;
1717 1716
1718 if (!session) { 1717 if (!session) {
1719 spin_unlock(&inode->i_lock); 1718 spin_unlock(&ci->i_ceph_lock);
1720 session = cap->session; 1719 session = cap->session;
1721 mutex_lock(&session->s_mutex); 1720 mutex_lock(&session->s_mutex);
1722 goto retry; 1721 goto retry;
@@ -1727,18 +1726,18 @@ retry:
1727 1726
1728 flushing = __mark_caps_flushing(inode, session); 1727 flushing = __mark_caps_flushing(inode, session);
1729 1728
1730 /* __send_cap drops i_lock */ 1729 /* __send_cap drops i_ceph_lock */
1731 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want, 1730 delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
1732 cap->issued | cap->implemented, flushing, 1731 cap->issued | cap->implemented, flushing,
1733 flush_tid); 1732 flush_tid);
1734 if (!delayed) 1733 if (!delayed)
1735 goto out_unlocked; 1734 goto out_unlocked;
1736 1735
1737 spin_lock(&inode->i_lock); 1736 spin_lock(&ci->i_ceph_lock);
1738 __cap_delay_requeue(mdsc, ci); 1737 __cap_delay_requeue(mdsc, ci);
1739 } 1738 }
1740out: 1739out:
1741 spin_unlock(&inode->i_lock); 1740 spin_unlock(&ci->i_ceph_lock);
1742out_unlocked: 1741out_unlocked:
1743 if (session && unlock_session) 1742 if (session && unlock_session)
1744 mutex_unlock(&session->s_mutex); 1743 mutex_unlock(&session->s_mutex);
@@ -1753,7 +1752,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
1753 struct ceph_inode_info *ci = ceph_inode(inode); 1752 struct ceph_inode_info *ci = ceph_inode(inode);
1754 int i, ret = 1; 1753 int i, ret = 1;
1755 1754
1756 spin_lock(&inode->i_lock); 1755 spin_lock(&ci->i_ceph_lock);
1757 for (i = 0; i < CEPH_CAP_BITS; i++) 1756 for (i = 0; i < CEPH_CAP_BITS; i++)
1758 if ((ci->i_flushing_caps & (1 << i)) && 1757 if ((ci->i_flushing_caps & (1 << i)) &&
1759 ci->i_cap_flush_tid[i] <= tid) { 1758 ci->i_cap_flush_tid[i] <= tid) {
@@ -1761,7 +1760,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
1761 ret = 0; 1760 ret = 0;
1762 break; 1761 break;
1763 } 1762 }
1764 spin_unlock(&inode->i_lock); 1763 spin_unlock(&ci->i_ceph_lock);
1765 return ret; 1764 return ret;
1766} 1765}
1767 1766
@@ -1868,10 +1867,10 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
1868 struct ceph_mds_client *mdsc = 1867 struct ceph_mds_client *mdsc =
1869 ceph_sb_to_client(inode->i_sb)->mdsc; 1868 ceph_sb_to_client(inode->i_sb)->mdsc;
1870 1869
1871 spin_lock(&inode->i_lock); 1870 spin_lock(&ci->i_ceph_lock);
1872 if (__ceph_caps_dirty(ci)) 1871 if (__ceph_caps_dirty(ci))
1873 __cap_delay_requeue_front(mdsc, ci); 1872 __cap_delay_requeue_front(mdsc, ci);
1874 spin_unlock(&inode->i_lock); 1873 spin_unlock(&ci->i_ceph_lock);
1875 } 1874 }
1876 return err; 1875 return err;
1877} 1876}
@@ -1894,7 +1893,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1894 struct inode *inode = &ci->vfs_inode; 1893 struct inode *inode = &ci->vfs_inode;
1895 struct ceph_cap *cap; 1894 struct ceph_cap *cap;
1896 1895
1897 spin_lock(&inode->i_lock); 1896 spin_lock(&ci->i_ceph_lock);
1898 cap = ci->i_auth_cap; 1897 cap = ci->i_auth_cap;
1899 if (cap && cap->session == session) { 1898 if (cap && cap->session == session) {
1900 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, 1899 dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
@@ -1904,7 +1903,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1904 pr_err("%p auth cap %p not mds%d ???\n", inode, 1903 pr_err("%p auth cap %p not mds%d ???\n", inode,
1905 cap, session->s_mds); 1904 cap, session->s_mds);
1906 } 1905 }
1907 spin_unlock(&inode->i_lock); 1906 spin_unlock(&ci->i_ceph_lock);
1908 } 1907 }
1909} 1908}
1910 1909
@@ -1921,7 +1920,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1921 struct ceph_cap *cap; 1920 struct ceph_cap *cap;
1922 int delayed = 0; 1921 int delayed = 0;
1923 1922
1924 spin_lock(&inode->i_lock); 1923 spin_lock(&ci->i_ceph_lock);
1925 cap = ci->i_auth_cap; 1924 cap = ci->i_auth_cap;
1926 if (cap && cap->session == session) { 1925 if (cap && cap->session == session) {
1927 dout("kick_flushing_caps %p cap %p %s\n", inode, 1926 dout("kick_flushing_caps %p cap %p %s\n", inode,
@@ -1932,14 +1931,14 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
1932 cap->issued | cap->implemented, 1931 cap->issued | cap->implemented,
1933 ci->i_flushing_caps, NULL); 1932 ci->i_flushing_caps, NULL);
1934 if (delayed) { 1933 if (delayed) {
1935 spin_lock(&inode->i_lock); 1934 spin_lock(&ci->i_ceph_lock);
1936 __cap_delay_requeue(mdsc, ci); 1935 __cap_delay_requeue(mdsc, ci);
1937 spin_unlock(&inode->i_lock); 1936 spin_unlock(&ci->i_ceph_lock);
1938 } 1937 }
1939 } else { 1938 } else {
1940 pr_err("%p auth cap %p not mds%d ???\n", inode, 1939 pr_err("%p auth cap %p not mds%d ???\n", inode,
1941 cap, session->s_mds); 1940 cap, session->s_mds);
1942 spin_unlock(&inode->i_lock); 1941 spin_unlock(&ci->i_ceph_lock);
1943 } 1942 }
1944 } 1943 }
1945} 1944}
@@ -1952,7 +1951,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1952 struct ceph_cap *cap; 1951 struct ceph_cap *cap;
1953 int delayed = 0; 1952 int delayed = 0;
1954 1953
1955 spin_lock(&inode->i_lock); 1954 spin_lock(&ci->i_ceph_lock);
1956 cap = ci->i_auth_cap; 1955 cap = ci->i_auth_cap;
1957 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode, 1956 dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
1958 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq); 1957 ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
@@ -1964,12 +1963,12 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1964 cap->issued | cap->implemented, 1963 cap->issued | cap->implemented,
1965 ci->i_flushing_caps, NULL); 1964 ci->i_flushing_caps, NULL);
1966 if (delayed) { 1965 if (delayed) {
1967 spin_lock(&inode->i_lock); 1966 spin_lock(&ci->i_ceph_lock);
1968 __cap_delay_requeue(mdsc, ci); 1967 __cap_delay_requeue(mdsc, ci);
1969 spin_unlock(&inode->i_lock); 1968 spin_unlock(&ci->i_ceph_lock);
1970 } 1969 }
1971 } else { 1970 } else {
1972 spin_unlock(&inode->i_lock); 1971 spin_unlock(&ci->i_ceph_lock);
1973 } 1972 }
1974} 1973}
1975 1974
@@ -1978,7 +1977,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
1978 * Take references to capabilities we hold, so that we don't release 1977 * Take references to capabilities we hold, so that we don't release
1979 * them to the MDS prematurely. 1978 * them to the MDS prematurely.
1980 * 1979 *
1981 * Protected by i_lock. 1980 * Protected by i_ceph_lock.
1982 */ 1981 */
1983static void __take_cap_refs(struct ceph_inode_info *ci, int got) 1982static void __take_cap_refs(struct ceph_inode_info *ci, int got)
1984{ 1983{
@@ -2016,7 +2015,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2016 2015
2017 dout("get_cap_refs %p need %s want %s\n", inode, 2016 dout("get_cap_refs %p need %s want %s\n", inode,
2018 ceph_cap_string(need), ceph_cap_string(want)); 2017 ceph_cap_string(need), ceph_cap_string(want));
2019 spin_lock(&inode->i_lock); 2018 spin_lock(&ci->i_ceph_lock);
2020 2019
2021 /* make sure file is actually open */ 2020 /* make sure file is actually open */
2022 file_wanted = __ceph_caps_file_wanted(ci); 2021 file_wanted = __ceph_caps_file_wanted(ci);
@@ -2077,7 +2076,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
2077 ceph_cap_string(have), ceph_cap_string(need)); 2076 ceph_cap_string(have), ceph_cap_string(need));
2078 } 2077 }
2079out: 2078out:
2080 spin_unlock(&inode->i_lock); 2079 spin_unlock(&ci->i_ceph_lock);
2081 dout("get_cap_refs %p ret %d got %s\n", inode, 2080 dout("get_cap_refs %p ret %d got %s\n", inode,
2082 ret, ceph_cap_string(*got)); 2081 ret, ceph_cap_string(*got));
2083 return ret; 2082 return ret;
@@ -2094,7 +2093,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2094 int check = 0; 2093 int check = 0;
2095 2094
2096 /* do we need to explicitly request a larger max_size? */ 2095 /* do we need to explicitly request a larger max_size? */
2097 spin_lock(&inode->i_lock); 2096 spin_lock(&ci->i_ceph_lock);
2098 if ((endoff >= ci->i_max_size || 2097 if ((endoff >= ci->i_max_size ||
2099 endoff > (inode->i_size << 1)) && 2098 endoff > (inode->i_size << 1)) &&
2100 endoff > ci->i_wanted_max_size) { 2099 endoff > ci->i_wanted_max_size) {
@@ -2103,7 +2102,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
2103 ci->i_wanted_max_size = endoff; 2102 ci->i_wanted_max_size = endoff;
2104 check = 1; 2103 check = 1;
2105 } 2104 }
2106 spin_unlock(&inode->i_lock); 2105 spin_unlock(&ci->i_ceph_lock);
2107 if (check) 2106 if (check)
2108 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2107 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
2109} 2108}
@@ -2140,9 +2139,9 @@ retry:
2140 */ 2139 */
2141void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps) 2140void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
2142{ 2141{
2143 spin_lock(&ci->vfs_inode.i_lock); 2142 spin_lock(&ci->i_ceph_lock);
2144 __take_cap_refs(ci, caps); 2143 __take_cap_refs(ci, caps);
2145 spin_unlock(&ci->vfs_inode.i_lock); 2144 spin_unlock(&ci->i_ceph_lock);
2146} 2145}
2147 2146
2148/* 2147/*
@@ -2160,7 +2159,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2160 int last = 0, put = 0, flushsnaps = 0, wake = 0; 2159 int last = 0, put = 0, flushsnaps = 0, wake = 0;
2161 struct ceph_cap_snap *capsnap; 2160 struct ceph_cap_snap *capsnap;
2162 2161
2163 spin_lock(&inode->i_lock); 2162 spin_lock(&ci->i_ceph_lock);
2164 if (had & CEPH_CAP_PIN) 2163 if (had & CEPH_CAP_PIN)
2165 --ci->i_pin_ref; 2164 --ci->i_pin_ref;
2166 if (had & CEPH_CAP_FILE_RD) 2165 if (had & CEPH_CAP_FILE_RD)
@@ -2193,7 +2192,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
2193 } 2192 }
2194 } 2193 }
2195 } 2194 }
2196 spin_unlock(&inode->i_lock); 2195 spin_unlock(&ci->i_ceph_lock);
2197 2196
2198 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), 2197 dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
2199 last ? " last" : "", put ? " put" : ""); 2198 last ? " last" : "", put ? " put" : "");
@@ -2225,7 +2224,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2225 int found = 0; 2224 int found = 0;
2226 struct ceph_cap_snap *capsnap = NULL; 2225 struct ceph_cap_snap *capsnap = NULL;
2227 2226
2228 spin_lock(&inode->i_lock); 2227 spin_lock(&ci->i_ceph_lock);
2229 ci->i_wrbuffer_ref -= nr; 2228 ci->i_wrbuffer_ref -= nr;
2230 last = !ci->i_wrbuffer_ref; 2229 last = !ci->i_wrbuffer_ref;
2231 2230
@@ -2274,7 +2273,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2274 } 2273 }
2275 } 2274 }
2276 2275
2277 spin_unlock(&inode->i_lock); 2276 spin_unlock(&ci->i_ceph_lock);
2278 2277
2279 if (last) { 2278 if (last) {
2280 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 2279 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2291,7 +2290,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
2291 * Handle a cap GRANT message from the MDS. (Note that a GRANT may 2290 * Handle a cap GRANT message from the MDS. (Note that a GRANT may
2292 * actually be a revocation if it specifies a smaller cap set.) 2291 * actually be a revocation if it specifies a smaller cap set.)
2293 * 2292 *
2294 * caller holds s_mutex and i_lock, we drop both. 2293 * caller holds s_mutex and i_ceph_lock, we drop both.
2295 * 2294 *
2296 * return value: 2295 * return value:
2297 * 0 - ok 2296 * 0 - ok
@@ -2302,7 +2301,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2302 struct ceph_mds_session *session, 2301 struct ceph_mds_session *session,
2303 struct ceph_cap *cap, 2302 struct ceph_cap *cap,
2304 struct ceph_buffer *xattr_buf) 2303 struct ceph_buffer *xattr_buf)
2305 __releases(inode->i_lock) 2304 __releases(ci->i_ceph_lock)
2306{ 2305{
2307 struct ceph_inode_info *ci = ceph_inode(inode); 2306 struct ceph_inode_info *ci = ceph_inode(inode);
2308 int mds = session->s_mds; 2307 int mds = session->s_mds;
@@ -2453,7 +2452,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
2453 } 2452 }
2454 BUG_ON(cap->issued & ~cap->implemented); 2453 BUG_ON(cap->issued & ~cap->implemented);
2455 2454
2456 spin_unlock(&inode->i_lock); 2455 spin_unlock(&ci->i_ceph_lock);
2457 if (writeback) 2456 if (writeback)
2458 /* 2457 /*
2459 * queue inode for writeback: we can't actually call 2458 * queue inode for writeback: we can't actually call
@@ -2483,7 +2482,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2483 struct ceph_mds_caps *m, 2482 struct ceph_mds_caps *m,
2484 struct ceph_mds_session *session, 2483 struct ceph_mds_session *session,
2485 struct ceph_cap *cap) 2484 struct ceph_cap *cap)
2486 __releases(inode->i_lock) 2485 __releases(ci->i_ceph_lock)
2487{ 2486{
2488 struct ceph_inode_info *ci = ceph_inode(inode); 2487 struct ceph_inode_info *ci = ceph_inode(inode);
2489 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 2488 struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -2539,7 +2538,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
2539 wake_up_all(&ci->i_cap_wq); 2538 wake_up_all(&ci->i_cap_wq);
2540 2539
2541out: 2540out:
2542 spin_unlock(&inode->i_lock); 2541 spin_unlock(&ci->i_ceph_lock);
2543 if (drop) 2542 if (drop)
2544 iput(inode); 2543 iput(inode);
2545} 2544}
@@ -2562,7 +2561,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2562 dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n", 2561 dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
2563 inode, ci, session->s_mds, follows); 2562 inode, ci, session->s_mds, follows);
2564 2563
2565 spin_lock(&inode->i_lock); 2564 spin_lock(&ci->i_ceph_lock);
2566 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) { 2565 list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
2567 if (capsnap->follows == follows) { 2566 if (capsnap->follows == follows) {
2568 if (capsnap->flush_tid != flush_tid) { 2567 if (capsnap->flush_tid != flush_tid) {
@@ -2585,7 +2584,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2585 capsnap, capsnap->follows); 2584 capsnap, capsnap->follows);
2586 } 2585 }
2587 } 2586 }
2588 spin_unlock(&inode->i_lock); 2587 spin_unlock(&ci->i_ceph_lock);
2589 if (drop) 2588 if (drop)
2590 iput(inode); 2589 iput(inode);
2591} 2590}
@@ -2598,7 +2597,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
2598static void handle_cap_trunc(struct inode *inode, 2597static void handle_cap_trunc(struct inode *inode,
2599 struct ceph_mds_caps *trunc, 2598 struct ceph_mds_caps *trunc,
2600 struct ceph_mds_session *session) 2599 struct ceph_mds_session *session)
2601 __releases(inode->i_lock) 2600 __releases(ci->i_ceph_lock)
2602{ 2601{
2603 struct ceph_inode_info *ci = ceph_inode(inode); 2602 struct ceph_inode_info *ci = ceph_inode(inode);
2604 int mds = session->s_mds; 2603 int mds = session->s_mds;
@@ -2617,7 +2616,7 @@ static void handle_cap_trunc(struct inode *inode,
2617 inode, mds, seq, truncate_size, truncate_seq); 2616 inode, mds, seq, truncate_size, truncate_seq);
2618 queue_trunc = ceph_fill_file_size(inode, issued, 2617 queue_trunc = ceph_fill_file_size(inode, issued,
2619 truncate_seq, truncate_size, size); 2618 truncate_seq, truncate_size, size);
2620 spin_unlock(&inode->i_lock); 2619 spin_unlock(&ci->i_ceph_lock);
2621 2620
2622 if (queue_trunc) 2621 if (queue_trunc)
2623 ceph_queue_vmtruncate(inode); 2622 ceph_queue_vmtruncate(inode);
@@ -2646,7 +2645,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2646 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n", 2645 dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
2647 inode, ci, mds, mseq); 2646 inode, ci, mds, mseq);
2648 2647
2649 spin_lock(&inode->i_lock); 2648 spin_lock(&ci->i_ceph_lock);
2650 2649
2651 /* make sure we haven't seen a higher mseq */ 2650 /* make sure we haven't seen a higher mseq */
2652 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { 2651 for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
@@ -2690,7 +2689,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
2690 } 2689 }
2691 /* else, we already released it */ 2690 /* else, we already released it */
2692 2691
2693 spin_unlock(&inode->i_lock); 2692 spin_unlock(&ci->i_ceph_lock);
2694} 2693}
2695 2694
2696/* 2695/*
@@ -2745,9 +2744,9 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
2745 up_read(&mdsc->snap_rwsem); 2744 up_read(&mdsc->snap_rwsem);
2746 2745
2747 /* make sure we re-request max_size, if necessary */ 2746 /* make sure we re-request max_size, if necessary */
2748 spin_lock(&inode->i_lock); 2747 spin_lock(&ci->i_ceph_lock);
2749 ci->i_requested_max_size = 0; 2748 ci->i_requested_max_size = 0;
2750 spin_unlock(&inode->i_lock); 2749 spin_unlock(&ci->i_ceph_lock);
2751} 2750}
2752 2751
2753/* 2752/*
@@ -2762,6 +2761,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2762 struct ceph_mds_client *mdsc = session->s_mdsc; 2761 struct ceph_mds_client *mdsc = session->s_mdsc;
2763 struct super_block *sb = mdsc->fsc->sb; 2762 struct super_block *sb = mdsc->fsc->sb;
2764 struct inode *inode; 2763 struct inode *inode;
2764 struct ceph_inode_info *ci;
2765 struct ceph_cap *cap; 2765 struct ceph_cap *cap;
2766 struct ceph_mds_caps *h; 2766 struct ceph_mds_caps *h;
2767 int mds = session->s_mds; 2767 int mds = session->s_mds;
@@ -2815,6 +2815,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2815 2815
2816 /* lookup ino */ 2816 /* lookup ino */
2817 inode = ceph_find_inode(sb, vino); 2817 inode = ceph_find_inode(sb, vino);
2818 ci = ceph_inode(inode);
2818 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino, 2819 dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
2819 vino.snap, inode); 2820 vino.snap, inode);
2820 if (!inode) { 2821 if (!inode) {
@@ -2844,16 +2845,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2844 } 2845 }
2845 2846
2846 /* the rest require a cap */ 2847 /* the rest require a cap */
2847 spin_lock(&inode->i_lock); 2848 spin_lock(&ci->i_ceph_lock);
2848 cap = __get_cap_for_mds(ceph_inode(inode), mds); 2849 cap = __get_cap_for_mds(ceph_inode(inode), mds);
2849 if (!cap) { 2850 if (!cap) {
2850 dout(" no cap on %p ino %llx.%llx from mds%d\n", 2851 dout(" no cap on %p ino %llx.%llx from mds%d\n",
2851 inode, ceph_ino(inode), ceph_snap(inode), mds); 2852 inode, ceph_ino(inode), ceph_snap(inode), mds);
2852 spin_unlock(&inode->i_lock); 2853 spin_unlock(&ci->i_ceph_lock);
2853 goto flush_cap_releases; 2854 goto flush_cap_releases;
2854 } 2855 }
2855 2856
2856 /* note that each of these drops i_lock for us */ 2857 /* note that each of these drops i_ceph_lock for us */
2857 switch (op) { 2858 switch (op) {
2858 case CEPH_CAP_OP_REVOKE: 2859 case CEPH_CAP_OP_REVOKE:
2859 case CEPH_CAP_OP_GRANT: 2860 case CEPH_CAP_OP_GRANT:
@@ -2869,7 +2870,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
2869 break; 2870 break;
2870 2871
2871 default: 2872 default:
2872 spin_unlock(&inode->i_lock); 2873 spin_unlock(&ci->i_ceph_lock);
2873 pr_err("ceph_handle_caps: unknown cap op %d %s\n", op, 2874 pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
2874 ceph_cap_op_name(op)); 2875 ceph_cap_op_name(op));
2875 } 2876 }
@@ -2962,13 +2963,13 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
2962 struct inode *inode = &ci->vfs_inode; 2963 struct inode *inode = &ci->vfs_inode;
2963 int last = 0; 2964 int last = 0;
2964 2965
2965 spin_lock(&inode->i_lock); 2966 spin_lock(&ci->i_ceph_lock);
2966 dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode, 2967 dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
2967 ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1); 2968 ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
2968 BUG_ON(ci->i_nr_by_mode[fmode] == 0); 2969 BUG_ON(ci->i_nr_by_mode[fmode] == 0);
2969 if (--ci->i_nr_by_mode[fmode] == 0) 2970 if (--ci->i_nr_by_mode[fmode] == 0)
2970 last++; 2971 last++;
2971 spin_unlock(&inode->i_lock); 2972 spin_unlock(&ci->i_ceph_lock);
2972 2973
2973 if (last && ci->i_vino.snap == CEPH_NOSNAP) 2974 if (last && ci->i_vino.snap == CEPH_NOSNAP)
2974 ceph_check_caps(ci, 0, NULL); 2975 ceph_check_caps(ci, 0, NULL);
@@ -2991,7 +2992,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
2991 int used, dirty; 2992 int used, dirty;
2992 int ret = 0; 2993 int ret = 0;
2993 2994
2994 spin_lock(&inode->i_lock); 2995 spin_lock(&ci->i_ceph_lock);
2995 used = __ceph_caps_used(ci); 2996 used = __ceph_caps_used(ci);
2996 dirty = __ceph_caps_dirty(ci); 2997 dirty = __ceph_caps_dirty(ci);
2997 2998
@@ -3046,7 +3047,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
3046 inode, cap, ceph_cap_string(cap->issued)); 3047 inode, cap, ceph_cap_string(cap->issued));
3047 } 3048 }
3048 } 3049 }
3049 spin_unlock(&inode->i_lock); 3050 spin_unlock(&ci->i_ceph_lock);
3050 return ret; 3051 return ret;
3051} 3052}
3052 3053
@@ -3061,7 +3062,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
3061 3062
3062 /* 3063 /*
3063 * force an record for the directory caps if we have a dentry lease. 3064 * force an record for the directory caps if we have a dentry lease.
3064 * this is racy (can't take i_lock and d_lock together), but it 3065 * this is racy (can't take i_ceph_lock and d_lock together), but it
3065 * doesn't have to be perfect; the mds will revoke anything we don't 3066 * doesn't have to be perfect; the mds will revoke anything we don't
3066 * release. 3067 * release.
3067 */ 3068 */
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index bca3948e9dbf..74fd74719dc2 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -281,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
281 } 281 }
282 282
283 /* can we use the dcache? */ 283 /* can we use the dcache? */
284 spin_lock(&inode->i_lock); 284 spin_lock(&ci->i_ceph_lock);
285 if ((filp->f_pos == 2 || fi->dentry) && 285 if ((filp->f_pos == 2 || fi->dentry) &&
286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && 286 !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
287 ceph_snap(inode) != CEPH_SNAPDIR && 287 ceph_snap(inode) != CEPH_SNAPDIR &&
288 ceph_dir_test_complete(inode) && 288 ceph_dir_test_complete(inode) &&
289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { 289 __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
290 spin_unlock(&inode->i_lock); 290 spin_unlock(&ci->i_ceph_lock);
291 err = __dcache_readdir(filp, dirent, filldir); 291 err = __dcache_readdir(filp, dirent, filldir);
292 if (err != -EAGAIN) 292 if (err != -EAGAIN)
293 return err; 293 return err;
294 } else { 294 } else {
295 spin_unlock(&inode->i_lock); 295 spin_unlock(&ci->i_ceph_lock);
296 } 296 }
297 if (fi->dentry) { 297 if (fi->dentry) {
298 err = note_last_dentry(fi, fi->dentry->d_name.name, 298 err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -428,12 +428,12 @@ more:
428 * were released during the whole readdir, and we should have 428 * were released during the whole readdir, and we should have
429 * the complete dir contents in our cache. 429 * the complete dir contents in our cache.
430 */ 430 */
431 spin_lock(&inode->i_lock); 431 spin_lock(&ci->i_ceph_lock);
432 if (ci->i_release_count == fi->dir_release_count) { 432 if (ci->i_release_count == fi->dir_release_count) {
433 ceph_dir_set_complete(inode); 433 ceph_dir_set_complete(inode);
434 ci->i_max_offset = filp->f_pos; 434 ci->i_max_offset = filp->f_pos;
435 } 435 }
436 spin_unlock(&inode->i_lock); 436 spin_unlock(&ci->i_ceph_lock);
437 437
438 dout("readdir %p filp %p done.\n", inode, filp); 438 dout("readdir %p filp %p done.\n", inode, filp);
439 return 0; 439 return 0;
@@ -607,7 +607,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
607 struct ceph_inode_info *ci = ceph_inode(dir); 607 struct ceph_inode_info *ci = ceph_inode(dir);
608 struct ceph_dentry_info *di = ceph_dentry(dentry); 608 struct ceph_dentry_info *di = ceph_dentry(dentry);
609 609
610 spin_lock(&dir->i_lock); 610 spin_lock(&ci->i_ceph_lock);
611 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); 611 dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
612 if (strncmp(dentry->d_name.name, 612 if (strncmp(dentry->d_name.name,
613 fsc->mount_options->snapdir_name, 613 fsc->mount_options->snapdir_name,
@@ -615,13 +615,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
615 !is_root_ceph_dentry(dir, dentry) && 615 !is_root_ceph_dentry(dir, dentry) &&
616 ceph_dir_test_complete(dir) && 616 ceph_dir_test_complete(dir) &&
617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) { 617 (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
618 spin_unlock(&dir->i_lock); 618 spin_unlock(&ci->i_ceph_lock);
619 dout(" dir %p complete, -ENOENT\n", dir); 619 dout(" dir %p complete, -ENOENT\n", dir);
620 d_add(dentry, NULL); 620 d_add(dentry, NULL);
621 di->lease_shared_gen = ci->i_shared_gen; 621 di->lease_shared_gen = ci->i_shared_gen;
622 return NULL; 622 return NULL;
623 } 623 }
624 spin_unlock(&dir->i_lock); 624 spin_unlock(&ci->i_ceph_lock);
625 } 625 }
626 626
627 op = ceph_snap(dir) == CEPH_SNAPDIR ? 627 op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -666,7 +666,7 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
666} 666}
667 667
668static int ceph_mknod(struct inode *dir, struct dentry *dentry, 668static int ceph_mknod(struct inode *dir, struct dentry *dentry,
669 int mode, dev_t rdev) 669 umode_t mode, dev_t rdev)
670{ 670{
671 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 671 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
672 struct ceph_mds_client *mdsc = fsc->mdsc; 672 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -676,7 +676,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
676 if (ceph_snap(dir) != CEPH_NOSNAP) 676 if (ceph_snap(dir) != CEPH_NOSNAP)
677 return -EROFS; 677 return -EROFS;
678 678
679 dout("mknod in dir %p dentry %p mode 0%o rdev %d\n", 679 dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
680 dir, dentry, mode, rdev); 680 dir, dentry, mode, rdev);
681 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS); 681 req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
682 if (IS_ERR(req)) { 682 if (IS_ERR(req)) {
@@ -699,7 +699,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
699 return err; 699 return err;
700} 700}
701 701
702static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, 702static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
703 struct nameidata *nd) 703 struct nameidata *nd)
704{ 704{
705 dout("create in dir %p dentry %p name '%.*s'\n", 705 dout("create in dir %p dentry %p name '%.*s'\n",
@@ -753,7 +753,7 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
753 return err; 753 return err;
754} 754}
755 755
756static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) 756static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
757{ 757{
758 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); 758 struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
759 struct ceph_mds_client *mdsc = fsc->mdsc; 759 struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -767,7 +767,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode)
767 dout("mksnap dir %p snap '%.*s' dn %p\n", dir, 767 dout("mksnap dir %p snap '%.*s' dn %p\n", dir,
768 dentry->d_name.len, dentry->d_name.name, dentry); 768 dentry->d_name.len, dentry->d_name.name, dentry);
769 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 769 } else if (ceph_snap(dir) == CEPH_NOSNAP) {
770 dout("mkdir dir %p dn %p mode 0%o\n", dir, dentry, mode); 770 dout("mkdir dir %p dn %p mode 0%ho\n", dir, dentry, mode);
771 op = CEPH_MDS_OP_MKDIR; 771 op = CEPH_MDS_OP_MKDIR;
772 } else { 772 } else {
773 goto out; 773 goto out;
@@ -841,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
841 struct ceph_inode_info *ci = ceph_inode(inode); 841 struct ceph_inode_info *ci = ceph_inode(inode);
842 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL; 842 int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
843 843
844 spin_lock(&inode->i_lock); 844 spin_lock(&ci->i_ceph_lock);
845 if (inode->i_nlink == 1) { 845 if (inode->i_nlink == 1) {
846 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN); 846 drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
847 ci->i_ceph_flags |= CEPH_I_NODELAY; 847 ci->i_ceph_flags |= CEPH_I_NODELAY;
848 } 848 }
849 spin_unlock(&inode->i_lock); 849 spin_unlock(&ci->i_ceph_lock);
850 return drop; 850 return drop;
851} 851}
852 852
@@ -870,7 +870,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
870 } else if (ceph_snap(dir) == CEPH_NOSNAP) { 870 } else if (ceph_snap(dir) == CEPH_NOSNAP) {
871 dout("unlink/rmdir dir %p dn %p inode %p\n", 871 dout("unlink/rmdir dir %p dn %p inode %p\n",
872 dir, dentry, inode); 872 dir, dentry, inode);
873 op = ((dentry->d_inode->i_mode & S_IFMT) == S_IFDIR) ? 873 op = S_ISDIR(dentry->d_inode->i_mode) ?
874 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; 874 CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
875 } else 875 } else
876 goto out; 876 goto out;
@@ -1015,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
1015 struct ceph_dentry_info *di = ceph_dentry(dentry); 1015 struct ceph_dentry_info *di = ceph_dentry(dentry);
1016 int valid = 0; 1016 int valid = 0;
1017 1017
1018 spin_lock(&dir->i_lock); 1018 spin_lock(&ci->i_ceph_lock);
1019 if (ci->i_shared_gen == di->lease_shared_gen) 1019 if (ci->i_shared_gen == di->lease_shared_gen)
1020 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1); 1020 valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
1021 spin_unlock(&dir->i_lock); 1021 spin_unlock(&ci->i_ceph_lock);
1022 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n", 1022 dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
1023 dir, (unsigned)ci->i_shared_gen, dentry, 1023 dir, (unsigned)ci->i_shared_gen, dentry,
1024 (unsigned)di->lease_shared_gen, valid); 1024 (unsigned)di->lease_shared_gen, valid);
@@ -1094,42 +1094,19 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
1094/* 1094/*
1095 * Set/clear/test dir complete flag on the dir's dentry. 1095 * Set/clear/test dir complete flag on the dir's dentry.
1096 */ 1096 */
1097static struct dentry * __d_find_any_alias(struct inode *inode)
1098{
1099 struct dentry *alias;
1100
1101 if (list_empty(&inode->i_dentry))
1102 return NULL;
1103 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1104 return alias;
1105}
1106
1107void ceph_dir_set_complete(struct inode *inode) 1097void ceph_dir_set_complete(struct inode *inode)
1108{ 1098{
1109 struct dentry *dentry = __d_find_any_alias(inode); 1099 /* not yet implemented */
1110
1111 if (dentry && ceph_dentry(dentry)) {
1112 dout(" marking %p (%p) complete\n", inode, dentry);
1113 set_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1114 }
1115} 1100}
1116 1101
1117void ceph_dir_clear_complete(struct inode *inode) 1102void ceph_dir_clear_complete(struct inode *inode)
1118{ 1103{
1119 struct dentry *dentry = __d_find_any_alias(inode); 1104 /* not yet implemented */
1120
1121 if (dentry && ceph_dentry(dentry)) {
1122 dout(" marking %p (%p) NOT complete\n", inode, dentry);
1123 clear_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1124 }
1125} 1105}
1126 1106
1127bool ceph_dir_test_complete(struct inode *inode) 1107bool ceph_dir_test_complete(struct inode *inode)
1128{ 1108{
1129 struct dentry *dentry = __d_find_any_alias(inode); 1109 /* not yet implemented */
1130
1131 if (dentry && ceph_dentry(dentry))
1132 return test_bit(CEPH_D_COMPLETE, &ceph_dentry(dentry)->flags);
1133 return false; 1110 return false;
1134} 1111}
1135 1112
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ce549d31eeb7..ed72428d9c75 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -147,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)
147 147
148 /* trivially open snapdir */ 148 /* trivially open snapdir */
149 if (ceph_snap(inode) == CEPH_SNAPDIR) { 149 if (ceph_snap(inode) == CEPH_SNAPDIR) {
150 spin_lock(&inode->i_lock); 150 spin_lock(&ci->i_ceph_lock);
151 __ceph_get_fmode(ci, fmode); 151 __ceph_get_fmode(ci, fmode);
152 spin_unlock(&inode->i_lock); 152 spin_unlock(&ci->i_ceph_lock);
153 return ceph_init_file(inode, file, fmode); 153 return ceph_init_file(inode, file, fmode);
154 } 154 }
155 155
@@ -158,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
158 * write) or any MDS (for read). Update wanted set 158 * write) or any MDS (for read). Update wanted set
159 * asynchronously. 159 * asynchronously.
160 */ 160 */
161 spin_lock(&inode->i_lock); 161 spin_lock(&ci->i_ceph_lock);
162 if (__ceph_is_any_real_caps(ci) && 162 if (__ceph_is_any_real_caps(ci) &&
163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { 163 (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
164 int mds_wanted = __ceph_caps_mds_wanted(ci); 164 int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -168,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
168 inode, fmode, ceph_cap_string(wanted), 168 inode, fmode, ceph_cap_string(wanted),
169 ceph_cap_string(issued)); 169 ceph_cap_string(issued));
170 __ceph_get_fmode(ci, fmode); 170 __ceph_get_fmode(ci, fmode);
171 spin_unlock(&inode->i_lock); 171 spin_unlock(&ci->i_ceph_lock);
172 172
173 /* adjust wanted? */ 173 /* adjust wanted? */
174 if ((issued & wanted) != wanted && 174 if ((issued & wanted) != wanted &&
@@ -180,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
180 } else if (ceph_snap(inode) != CEPH_NOSNAP && 180 } else if (ceph_snap(inode) != CEPH_NOSNAP &&
181 (ci->i_snap_caps & wanted) == wanted) { 181 (ci->i_snap_caps & wanted) == wanted) {
182 __ceph_get_fmode(ci, fmode); 182 __ceph_get_fmode(ci, fmode);
183 spin_unlock(&inode->i_lock); 183 spin_unlock(&ci->i_ceph_lock);
184 return ceph_init_file(inode, file, fmode); 184 return ceph_init_file(inode, file, fmode);
185 } 185 }
186 spin_unlock(&inode->i_lock); 186 spin_unlock(&ci->i_ceph_lock);
187 187
188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted)); 188 dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
189 req = prepare_open_request(inode->i_sb, flags, 0); 189 req = prepare_open_request(inode->i_sb, flags, 0);
@@ -743,9 +743,9 @@ retry_snap:
743 */ 743 */
744 int dirty; 744 int dirty;
745 745
746 spin_lock(&inode->i_lock); 746 spin_lock(&ci->i_ceph_lock);
747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 747 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
748 spin_unlock(&inode->i_lock); 748 spin_unlock(&ci->i_ceph_lock);
749 ceph_put_cap_refs(ci, got); 749 ceph_put_cap_refs(ci, got);
750 750
751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 751 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
@@ -764,9 +764,9 @@ retry_snap:
764 764
765 if (ret >= 0) { 765 if (ret >= 0) {
766 int dirty; 766 int dirty;
767 spin_lock(&inode->i_lock); 767 spin_lock(&ci->i_ceph_lock);
768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); 768 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
769 spin_unlock(&inode->i_lock); 769 spin_unlock(&ci->i_ceph_lock);
770 if (dirty) 770 if (dirty)
771 __mark_inode_dirty(inode, dirty); 771 __mark_inode_dirty(inode, dirty);
772 } 772 }
@@ -797,7 +797,8 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
797 797
798 mutex_lock(&inode->i_mutex); 798 mutex_lock(&inode->i_mutex);
799 __ceph_do_pending_vmtruncate(inode); 799 __ceph_do_pending_vmtruncate(inode);
800 if (origin != SEEK_CUR || origin != SEEK_SET) { 800
801 if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
801 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 802 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
802 if (ret < 0) { 803 if (ret < 0) {
803 offset = ret; 804 offset = ret;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 116f36502f17..25283e7a37f8 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -297,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
297 297
298 dout("alloc_inode %p\n", &ci->vfs_inode); 298 dout("alloc_inode %p\n", &ci->vfs_inode);
299 299
300 spin_lock_init(&ci->i_ceph_lock);
301
300 ci->i_version = 0; 302 ci->i_version = 0;
301 ci->i_time_warp_seq = 0; 303 ci->i_time_warp_seq = 0;
302 ci->i_ceph_flags = 0; 304 ci->i_ceph_flags = 0;
@@ -382,7 +384,6 @@ static void ceph_i_callback(struct rcu_head *head)
382 struct inode *inode = container_of(head, struct inode, i_rcu); 384 struct inode *inode = container_of(head, struct inode, i_rcu);
383 struct ceph_inode_info *ci = ceph_inode(inode); 385 struct ceph_inode_info *ci = ceph_inode(inode);
384 386
385 INIT_LIST_HEAD(&inode->i_dentry);
386 kmem_cache_free(ceph_inode_cachep, ci); 387 kmem_cache_free(ceph_inode_cachep, ci);
387} 388}
388 389
@@ -583,7 +584,7 @@ static int fill_inode(struct inode *inode,
583 iinfo->xattr_len); 584 iinfo->xattr_len);
584 } 585 }
585 586
586 spin_lock(&inode->i_lock); 587 spin_lock(&ci->i_ceph_lock);
587 588
588 /* 589 /*
589 * provided version will be odd if inode value is projected, 590 * provided version will be odd if inode value is projected,
@@ -680,7 +681,7 @@ static int fill_inode(struct inode *inode,
680 char *sym; 681 char *sym;
681 682
682 BUG_ON(symlen != inode->i_size); 683 BUG_ON(symlen != inode->i_size);
683 spin_unlock(&inode->i_lock); 684 spin_unlock(&ci->i_ceph_lock);
684 685
685 err = -ENOMEM; 686 err = -ENOMEM;
686 sym = kmalloc(symlen+1, GFP_NOFS); 687 sym = kmalloc(symlen+1, GFP_NOFS);
@@ -689,7 +690,7 @@ static int fill_inode(struct inode *inode,
689 memcpy(sym, iinfo->symlink, symlen); 690 memcpy(sym, iinfo->symlink, symlen);
690 sym[symlen] = 0; 691 sym[symlen] = 0;
691 692
692 spin_lock(&inode->i_lock); 693 spin_lock(&ci->i_ceph_lock);
693 if (!ci->i_symlink) 694 if (!ci->i_symlink)
694 ci->i_symlink = sym; 695 ci->i_symlink = sym;
695 else 696 else
@@ -715,7 +716,7 @@ static int fill_inode(struct inode *inode,
715 } 716 }
716 717
717no_change: 718no_change:
718 spin_unlock(&inode->i_lock); 719 spin_unlock(&ci->i_ceph_lock);
719 720
720 /* queue truncate if we saw i_size decrease */ 721 /* queue truncate if we saw i_size decrease */
721 if (queue_trunc) 722 if (queue_trunc)
@@ -750,13 +751,13 @@ no_change:
750 info->cap.flags, 751 info->cap.flags,
751 caps_reservation); 752 caps_reservation);
752 } else { 753 } else {
753 spin_lock(&inode->i_lock); 754 spin_lock(&ci->i_ceph_lock);
754 dout(" %p got snap_caps %s\n", inode, 755 dout(" %p got snap_caps %s\n", inode,
755 ceph_cap_string(le32_to_cpu(info->cap.caps))); 756 ceph_cap_string(le32_to_cpu(info->cap.caps)));
756 ci->i_snap_caps |= le32_to_cpu(info->cap.caps); 757 ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
757 if (cap_fmode >= 0) 758 if (cap_fmode >= 0)
758 __ceph_get_fmode(ci, cap_fmode); 759 __ceph_get_fmode(ci, cap_fmode);
759 spin_unlock(&inode->i_lock); 760 spin_unlock(&ci->i_ceph_lock);
760 } 761 }
761 } else if (cap_fmode >= 0) { 762 } else if (cap_fmode >= 0) {
762 pr_warning("mds issued no caps on %llx.%llx\n", 763 pr_warning("mds issued no caps on %llx.%llx\n",
@@ -849,19 +850,20 @@ static void ceph_set_dentry_offset(struct dentry *dn)
849{ 850{
850 struct dentry *dir = dn->d_parent; 851 struct dentry *dir = dn->d_parent;
851 struct inode *inode = dir->d_inode; 852 struct inode *inode = dir->d_inode;
853 struct ceph_inode_info *ci = ceph_inode(inode);
852 struct ceph_dentry_info *di; 854 struct ceph_dentry_info *di;
853 855
854 BUG_ON(!inode); 856 BUG_ON(!inode);
855 857
856 di = ceph_dentry(dn); 858 di = ceph_dentry(dn);
857 859
858 spin_lock(&inode->i_lock); 860 spin_lock(&ci->i_ceph_lock);
859 if (!ceph_dir_test_complete(inode)) { 861 if (!ceph_dir_test_complete(inode)) {
860 spin_unlock(&inode->i_lock); 862 spin_unlock(&ci->i_ceph_lock);
861 return; 863 return;
862 } 864 }
863 di->offset = ceph_inode(inode)->i_max_offset++; 865 di->offset = ceph_inode(inode)->i_max_offset++;
864 spin_unlock(&inode->i_lock); 866 spin_unlock(&ci->i_ceph_lock);
865 867
866 spin_lock(&dir->d_lock); 868 spin_lock(&dir->d_lock);
867 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); 869 spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1308,7 +1310,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1308 struct ceph_inode_info *ci = ceph_inode(inode); 1310 struct ceph_inode_info *ci = ceph_inode(inode);
1309 int ret = 0; 1311 int ret = 0;
1310 1312
1311 spin_lock(&inode->i_lock); 1313 spin_lock(&ci->i_ceph_lock);
1312 dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size); 1314 dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
1313 inode->i_size = size; 1315 inode->i_size = size;
1314 inode->i_blocks = (size + (1 << 9) - 1) >> 9; 1316 inode->i_blocks = (size + (1 << 9) - 1) >> 9;
@@ -1318,7 +1320,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
1318 (ci->i_reported_size << 1) < ci->i_max_size) 1320 (ci->i_reported_size << 1) < ci->i_max_size)
1319 ret = 1; 1321 ret = 1;
1320 1322
1321 spin_unlock(&inode->i_lock); 1323 spin_unlock(&ci->i_ceph_lock);
1322 return ret; 1324 return ret;
1323} 1325}
1324 1326
@@ -1376,20 +1378,20 @@ static void ceph_invalidate_work(struct work_struct *work)
1376 u32 orig_gen; 1378 u32 orig_gen;
1377 int check = 0; 1379 int check = 0;
1378 1380
1379 spin_lock(&inode->i_lock); 1381 spin_lock(&ci->i_ceph_lock);
1380 dout("invalidate_pages %p gen %d revoking %d\n", inode, 1382 dout("invalidate_pages %p gen %d revoking %d\n", inode,
1381 ci->i_rdcache_gen, ci->i_rdcache_revoking); 1383 ci->i_rdcache_gen, ci->i_rdcache_revoking);
1382 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { 1384 if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
1383 /* nevermind! */ 1385 /* nevermind! */
1384 spin_unlock(&inode->i_lock); 1386 spin_unlock(&ci->i_ceph_lock);
1385 goto out; 1387 goto out;
1386 } 1388 }
1387 orig_gen = ci->i_rdcache_gen; 1389 orig_gen = ci->i_rdcache_gen;
1388 spin_unlock(&inode->i_lock); 1390 spin_unlock(&ci->i_ceph_lock);
1389 1391
1390 truncate_inode_pages(&inode->i_data, 0); 1392 truncate_inode_pages(&inode->i_data, 0);
1391 1393
1392 spin_lock(&inode->i_lock); 1394 spin_lock(&ci->i_ceph_lock);
1393 if (orig_gen == ci->i_rdcache_gen && 1395 if (orig_gen == ci->i_rdcache_gen &&
1394 orig_gen == ci->i_rdcache_revoking) { 1396 orig_gen == ci->i_rdcache_revoking) {
1395 dout("invalidate_pages %p gen %d successful\n", inode, 1397 dout("invalidate_pages %p gen %d successful\n", inode,
@@ -1401,7 +1403,7 @@ static void ceph_invalidate_work(struct work_struct *work)
1401 inode, orig_gen, ci->i_rdcache_gen, 1403 inode, orig_gen, ci->i_rdcache_gen,
1402 ci->i_rdcache_revoking); 1404 ci->i_rdcache_revoking);
1403 } 1405 }
1404 spin_unlock(&inode->i_lock); 1406 spin_unlock(&ci->i_ceph_lock);
1405 1407
1406 if (check) 1408 if (check)
1407 ceph_check_caps(ci, 0, NULL); 1409 ceph_check_caps(ci, 0, NULL);
@@ -1460,10 +1462,10 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
1460 int wrbuffer_refs, wake = 0; 1462 int wrbuffer_refs, wake = 0;
1461 1463
1462retry: 1464retry:
1463 spin_lock(&inode->i_lock); 1465 spin_lock(&ci->i_ceph_lock);
1464 if (ci->i_truncate_pending == 0) { 1466 if (ci->i_truncate_pending == 0) {
1465 dout("__do_pending_vmtruncate %p none pending\n", inode); 1467 dout("__do_pending_vmtruncate %p none pending\n", inode);
1466 spin_unlock(&inode->i_lock); 1468 spin_unlock(&ci->i_ceph_lock);
1467 return; 1469 return;
1468 } 1470 }
1469 1471
@@ -1474,7 +1476,7 @@ retry:
1474 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { 1476 if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
1475 dout("__do_pending_vmtruncate %p flushing snaps first\n", 1477 dout("__do_pending_vmtruncate %p flushing snaps first\n",
1476 inode); 1478 inode);
1477 spin_unlock(&inode->i_lock); 1479 spin_unlock(&ci->i_ceph_lock);
1478 filemap_write_and_wait_range(&inode->i_data, 0, 1480 filemap_write_and_wait_range(&inode->i_data, 0,
1479 inode->i_sb->s_maxbytes); 1481 inode->i_sb->s_maxbytes);
1480 goto retry; 1482 goto retry;
@@ -1484,15 +1486,15 @@ retry:
1484 wrbuffer_refs = ci->i_wrbuffer_ref; 1486 wrbuffer_refs = ci->i_wrbuffer_ref;
1485 dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode, 1487 dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
1486 ci->i_truncate_pending, to); 1488 ci->i_truncate_pending, to);
1487 spin_unlock(&inode->i_lock); 1489 spin_unlock(&ci->i_ceph_lock);
1488 1490
1489 truncate_inode_pages(inode->i_mapping, to); 1491 truncate_inode_pages(inode->i_mapping, to);
1490 1492
1491 spin_lock(&inode->i_lock); 1493 spin_lock(&ci->i_ceph_lock);
1492 ci->i_truncate_pending--; 1494 ci->i_truncate_pending--;
1493 if (ci->i_truncate_pending == 0) 1495 if (ci->i_truncate_pending == 0)
1494 wake = 1; 1496 wake = 1;
1495 spin_unlock(&inode->i_lock); 1497 spin_unlock(&ci->i_ceph_lock);
1496 1498
1497 if (wrbuffer_refs == 0) 1499 if (wrbuffer_refs == 0)
1498 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL); 1500 ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -1547,7 +1549,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1547 if (IS_ERR(req)) 1549 if (IS_ERR(req))
1548 return PTR_ERR(req); 1550 return PTR_ERR(req);
1549 1551
1550 spin_lock(&inode->i_lock); 1552 spin_lock(&ci->i_ceph_lock);
1551 issued = __ceph_caps_issued(ci, NULL); 1553 issued = __ceph_caps_issued(ci, NULL);
1552 dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); 1554 dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
1553 1555
@@ -1695,7 +1697,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1695 } 1697 }
1696 1698
1697 release &= issued; 1699 release &= issued;
1698 spin_unlock(&inode->i_lock); 1700 spin_unlock(&ci->i_ceph_lock);
1699 1701
1700 if (inode_dirty_flags) 1702 if (inode_dirty_flags)
1701 __mark_inode_dirty(inode, inode_dirty_flags); 1703 __mark_inode_dirty(inode, inode_dirty_flags);
@@ -1717,7 +1719,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
1717 __ceph_do_pending_vmtruncate(inode); 1719 __ceph_do_pending_vmtruncate(inode);
1718 return err; 1720 return err;
1719out: 1721out:
1720 spin_unlock(&inode->i_lock); 1722 spin_unlock(&ci->i_ceph_lock);
1721 ceph_mdsc_put_request(req); 1723 ceph_mdsc_put_request(req);
1722 return err; 1724 return err;
1723} 1725}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 5a14c29cbba6..790914a598dd 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -241,11 +241,11 @@ static long ceph_ioctl_lazyio(struct file *file)
241 struct ceph_inode_info *ci = ceph_inode(inode); 241 struct ceph_inode_info *ci = ceph_inode(inode);
242 242
243 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) { 243 if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
244 spin_lock(&inode->i_lock); 244 spin_lock(&ci->i_ceph_lock);
245 ci->i_nr_by_mode[fi->fmode]--; 245 ci->i_nr_by_mode[fi->fmode]--;
246 fi->fmode |= CEPH_FILE_MODE_LAZY; 246 fi->fmode |= CEPH_FILE_MODE_LAZY;
247 ci->i_nr_by_mode[fi->fmode]++; 247 ci->i_nr_by_mode[fi->fmode]++;
248 spin_unlock(&inode->i_lock); 248 spin_unlock(&ci->i_ceph_lock);
249 dout("ioctl_layzio: file %p marked lazy\n", file); 249 dout("ioctl_layzio: file %p marked lazy\n", file);
250 250
251 ceph_check_caps(ci, 0, NULL); 251 ceph_check_caps(ci, 0, NULL);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 264ab701154f..6203d805eb45 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -732,21 +732,21 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
732 } 732 }
733 } 733 }
734 734
735 spin_lock(&inode->i_lock); 735 spin_lock(&ci->i_ceph_lock);
736 cap = NULL; 736 cap = NULL;
737 if (mode == USE_AUTH_MDS) 737 if (mode == USE_AUTH_MDS)
738 cap = ci->i_auth_cap; 738 cap = ci->i_auth_cap;
739 if (!cap && !RB_EMPTY_ROOT(&ci->i_caps)) 739 if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
740 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); 740 cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
741 if (!cap) { 741 if (!cap) {
742 spin_unlock(&inode->i_lock); 742 spin_unlock(&ci->i_ceph_lock);
743 goto random; 743 goto random;
744 } 744 }
745 mds = cap->session->s_mds; 745 mds = cap->session->s_mds;
746 dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n", 746 dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
747 inode, ceph_vinop(inode), mds, 747 inode, ceph_vinop(inode), mds,
748 cap == ci->i_auth_cap ? "auth " : "", cap); 748 cap == ci->i_auth_cap ? "auth " : "", cap);
749 spin_unlock(&inode->i_lock); 749 spin_unlock(&ci->i_ceph_lock);
750 return mds; 750 return mds;
751 751
752random: 752random:
@@ -951,7 +951,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
951 951
952 dout("removing cap %p, ci is %p, inode is %p\n", 952 dout("removing cap %p, ci is %p, inode is %p\n",
953 cap, ci, &ci->vfs_inode); 953 cap, ci, &ci->vfs_inode);
954 spin_lock(&inode->i_lock); 954 spin_lock(&ci->i_ceph_lock);
955 __ceph_remove_cap(cap); 955 __ceph_remove_cap(cap);
956 if (!__ceph_is_any_real_caps(ci)) { 956 if (!__ceph_is_any_real_caps(ci)) {
957 struct ceph_mds_client *mdsc = 957 struct ceph_mds_client *mdsc =
@@ -984,7 +984,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
984 } 984 }
985 spin_unlock(&mdsc->cap_dirty_lock); 985 spin_unlock(&mdsc->cap_dirty_lock);
986 } 986 }
987 spin_unlock(&inode->i_lock); 987 spin_unlock(&ci->i_ceph_lock);
988 while (drop--) 988 while (drop--)
989 iput(inode); 989 iput(inode);
990 return 0; 990 return 0;
@@ -1015,10 +1015,10 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
1015 1015
1016 wake_up_all(&ci->i_cap_wq); 1016 wake_up_all(&ci->i_cap_wq);
1017 if (arg) { 1017 if (arg) {
1018 spin_lock(&inode->i_lock); 1018 spin_lock(&ci->i_ceph_lock);
1019 ci->i_wanted_max_size = 0; 1019 ci->i_wanted_max_size = 0;
1020 ci->i_requested_max_size = 0; 1020 ci->i_requested_max_size = 0;
1021 spin_unlock(&inode->i_lock); 1021 spin_unlock(&ci->i_ceph_lock);
1022 } 1022 }
1023 return 0; 1023 return 0;
1024} 1024}
@@ -1151,7 +1151,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1151 if (session->s_trim_caps <= 0) 1151 if (session->s_trim_caps <= 0)
1152 return -1; 1152 return -1;
1153 1153
1154 spin_lock(&inode->i_lock); 1154 spin_lock(&ci->i_ceph_lock);
1155 mine = cap->issued | cap->implemented; 1155 mine = cap->issued | cap->implemented;
1156 used = __ceph_caps_used(ci); 1156 used = __ceph_caps_used(ci);
1157 oissued = __ceph_caps_issued_other(ci, cap); 1157 oissued = __ceph_caps_issued_other(ci, cap);
@@ -1170,7 +1170,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1170 __ceph_remove_cap(cap); 1170 __ceph_remove_cap(cap);
1171 } else { 1171 } else {
1172 /* try to drop referring dentries */ 1172 /* try to drop referring dentries */
1173 spin_unlock(&inode->i_lock); 1173 spin_unlock(&ci->i_ceph_lock);
1174 d_prune_aliases(inode); 1174 d_prune_aliases(inode);
1175 dout("trim_caps_cb %p cap %p pruned, count now %d\n", 1175 dout("trim_caps_cb %p cap %p pruned, count now %d\n",
1176 inode, cap, atomic_read(&inode->i_count)); 1176 inode, cap, atomic_read(&inode->i_count));
@@ -1178,7 +1178,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1178 } 1178 }
1179 1179
1180out: 1180out:
1181 spin_unlock(&inode->i_lock); 1181 spin_unlock(&ci->i_ceph_lock);
1182 return 0; 1182 return 0;
1183} 1183}
1184 1184
@@ -1296,7 +1296,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1296 i_flushing_item); 1296 i_flushing_item);
1297 struct inode *inode = &ci->vfs_inode; 1297 struct inode *inode = &ci->vfs_inode;
1298 1298
1299 spin_lock(&inode->i_lock); 1299 spin_lock(&ci->i_ceph_lock);
1300 if (ci->i_cap_flush_seq <= want_flush_seq) { 1300 if (ci->i_cap_flush_seq <= want_flush_seq) {
1301 dout("check_cap_flush still flushing %p " 1301 dout("check_cap_flush still flushing %p "
1302 "seq %lld <= %lld to mds%d\n", inode, 1302 "seq %lld <= %lld to mds%d\n", inode,
@@ -1304,7 +1304,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
1304 session->s_mds); 1304 session->s_mds);
1305 ret = 0; 1305 ret = 0;
1306 } 1306 }
1307 spin_unlock(&inode->i_lock); 1307 spin_unlock(&ci->i_ceph_lock);
1308 } 1308 }
1309 mutex_unlock(&session->s_mutex); 1309 mutex_unlock(&session->s_mutex);
1310 ceph_put_mds_session(session); 1310 ceph_put_mds_session(session);
@@ -1495,6 +1495,7 @@ retry:
1495 pos, temp); 1495 pos, temp);
1496 } else if (stop_on_nosnap && inode && 1496 } else if (stop_on_nosnap && inode &&
1497 ceph_snap(inode) == CEPH_NOSNAP) { 1497 ceph_snap(inode) == CEPH_NOSNAP) {
1498 spin_unlock(&temp->d_lock);
1498 break; 1499 break;
1499 } else { 1500 } else {
1500 pos -= temp->d_name.len; 1501 pos -= temp->d_name.len;
@@ -2011,10 +2012,10 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
2011 struct ceph_inode_info *ci = ceph_inode(inode); 2012 struct ceph_inode_info *ci = ceph_inode(inode);
2012 2013
2013 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode); 2014 dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
2014 spin_lock(&inode->i_lock); 2015 spin_lock(&ci->i_ceph_lock);
2015 ceph_dir_clear_complete(inode); 2016 ceph_dir_clear_complete(inode);
2016 ci->i_release_count++; 2017 ci->i_release_count++;
2017 spin_unlock(&inode->i_lock); 2018 spin_unlock(&ci->i_ceph_lock);
2018 2019
2019 if (req->r_dentry) 2020 if (req->r_dentry)
2020 ceph_invalidate_dentry_lease(req->r_dentry); 2021 ceph_invalidate_dentry_lease(req->r_dentry);
@@ -2422,7 +2423,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2422 if (err) 2423 if (err)
2423 goto out_free; 2424 goto out_free;
2424 2425
2425 spin_lock(&inode->i_lock); 2426 spin_lock(&ci->i_ceph_lock);
2426 cap->seq = 0; /* reset cap seq */ 2427 cap->seq = 0; /* reset cap seq */
2427 cap->issue_seq = 0; /* and issue_seq */ 2428 cap->issue_seq = 0; /* and issue_seq */
2428 2429
@@ -2445,7 +2446,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
2445 rec.v1.pathbase = cpu_to_le64(pathbase); 2446 rec.v1.pathbase = cpu_to_le64(pathbase);
2446 reclen = sizeof(rec.v1); 2447 reclen = sizeof(rec.v1);
2447 } 2448 }
2448 spin_unlock(&inode->i_lock); 2449 spin_unlock(&ci->i_ceph_lock);
2449 2450
2450 if (recon_state->flock) { 2451 if (recon_state->flock) {
2451 int num_fcntl_locks, num_flock_locks; 2452 int num_fcntl_locks, num_flock_locks;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 4bb239921dbd..a50ca0e39475 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -20,7 +20,7 @@
20 * 20 *
21 * mdsc->snap_rwsem 21 * mdsc->snap_rwsem
22 * 22 *
23 * inode->i_lock 23 * ci->i_ceph_lock
24 * mdsc->snap_flush_lock 24 * mdsc->snap_flush_lock
25 * mdsc->cap_delay_lock 25 * mdsc->cap_delay_lock
26 * 26 *
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e26437191333..a559c80f127a 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -446,7 +446,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
446 return; 446 return;
447 } 447 }
448 448
449 spin_lock(&inode->i_lock); 449 spin_lock(&ci->i_ceph_lock);
450 used = __ceph_caps_used(ci); 450 used = __ceph_caps_used(ci);
451 dirty = __ceph_caps_dirty(ci); 451 dirty = __ceph_caps_dirty(ci);
452 452
@@ -528,7 +528,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
528 kfree(capsnap); 528 kfree(capsnap);
529 } 529 }
530 530
531 spin_unlock(&inode->i_lock); 531 spin_unlock(&ci->i_ceph_lock);
532} 532}
533 533
534/* 534/*
@@ -537,7 +537,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
537 * 537 *
538 * If capsnap can now be flushed, add to snap_flush list, and return 1. 538 * If capsnap can now be flushed, add to snap_flush list, and return 1.
539 * 539 *
540 * Caller must hold i_lock. 540 * Caller must hold i_ceph_lock.
541 */ 541 */
542int __ceph_finish_cap_snap(struct ceph_inode_info *ci, 542int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
543 struct ceph_cap_snap *capsnap) 543 struct ceph_cap_snap *capsnap)
@@ -739,9 +739,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
739 inode = &ci->vfs_inode; 739 inode = &ci->vfs_inode;
740 ihold(inode); 740 ihold(inode);
741 spin_unlock(&mdsc->snap_flush_lock); 741 spin_unlock(&mdsc->snap_flush_lock);
742 spin_lock(&inode->i_lock); 742 spin_lock(&ci->i_ceph_lock);
743 __ceph_flush_snaps(ci, &session, 0); 743 __ceph_flush_snaps(ci, &session, 0);
744 spin_unlock(&inode->i_lock); 744 spin_unlock(&ci->i_ceph_lock);
745 iput(inode); 745 iput(inode);
746 spin_lock(&mdsc->snap_flush_lock); 746 spin_lock(&mdsc->snap_flush_lock);
747 } 747 }
@@ -847,7 +847,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
847 continue; 847 continue;
848 ci = ceph_inode(inode); 848 ci = ceph_inode(inode);
849 849
850 spin_lock(&inode->i_lock); 850 spin_lock(&ci->i_ceph_lock);
851 if (!ci->i_snap_realm) 851 if (!ci->i_snap_realm)
852 goto skip_inode; 852 goto skip_inode;
853 /* 853 /*
@@ -876,7 +876,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
876 oldrealm = ci->i_snap_realm; 876 oldrealm = ci->i_snap_realm;
877 ci->i_snap_realm = realm; 877 ci->i_snap_realm = realm;
878 spin_unlock(&realm->inodes_with_caps_lock); 878 spin_unlock(&realm->inodes_with_caps_lock);
879 spin_unlock(&inode->i_lock); 879 spin_unlock(&ci->i_ceph_lock);
880 880
881 ceph_get_snap_realm(mdsc, realm); 881 ceph_get_snap_realm(mdsc, realm);
882 ceph_put_snap_realm(mdsc, oldrealm); 882 ceph_put_snap_realm(mdsc, oldrealm);
@@ -885,7 +885,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
885 continue; 885 continue;
886 886
887skip_inode: 887skip_inode:
888 spin_unlock(&inode->i_lock); 888 spin_unlock(&ci->i_ceph_lock);
889 iput(inode); 889 iput(inode);
890 } 890 }
891 891
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 8dc73a594a90..11bd0fc4853f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -341,11 +341,11 @@ out:
341/** 341/**
342 * ceph_show_options - Show mount options in /proc/mounts 342 * ceph_show_options - Show mount options in /proc/mounts
343 * @m: seq_file to write to 343 * @m: seq_file to write to
344 * @mnt: mount descriptor 344 * @root: root of that (sub)tree
345 */ 345 */
346static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) 346static int ceph_show_options(struct seq_file *m, struct dentry *root)
347{ 347{
348 struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb); 348 struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
349 struct ceph_mount_options *fsopt = fsc->mount_options; 349 struct ceph_mount_options *fsopt = fsc->mount_options;
350 struct ceph_options *opt = fsc->client->options; 350 struct ceph_options *opt = fsc->client->options;
351 351
@@ -383,7 +383,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
383 if (fsopt->rsize != CEPH_RSIZE_DEFAULT) 383 if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
384 seq_printf(m, ",rsize=%d", fsopt->rsize); 384 seq_printf(m, ",rsize=%d", fsopt->rsize);
385 if (fsopt->rasize != CEPH_RASIZE_DEFAULT) 385 if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
386 seq_printf(m, ",rasize=%d", fsopt->rsize); 386 seq_printf(m, ",rasize=%d", fsopt->rasize);
387 if (fsopt->congestion_kb != default_congestion_kb()) 387 if (fsopt->congestion_kb != default_congestion_kb())
388 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 388 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
389 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) 389 if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 01bf189e08a9..cb3652b37271 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -136,7 +136,7 @@ struct ceph_cap_snap {
136 int issued, dirty; 136 int issued, dirty;
137 struct ceph_snap_context *context; 137 struct ceph_snap_context *context;
138 138
139 mode_t mode; 139 umode_t mode;
140 uid_t uid; 140 uid_t uid;
141 gid_t gid; 141 gid_t gid;
142 142
@@ -220,7 +220,7 @@ struct ceph_dentry_info {
220 * The locking for D_COMPLETE is a bit odd: 220 * The locking for D_COMPLETE is a bit odd:
221 * - we can clear it at almost any time (see ceph_d_prune) 221 * - we can clear it at almost any time (see ceph_d_prune)
222 * - it is only meaningful if: 222 * - it is only meaningful if:
223 * - we hold dir inode i_lock 223 * - we hold dir inode i_ceph_lock
224 * - we hold dir FILE_SHARED caps 224 * - we hold dir FILE_SHARED caps
225 * - the dentry D_COMPLETE is set 225 * - the dentry D_COMPLETE is set
226 */ 226 */
@@ -250,6 +250,8 @@ struct ceph_inode_xattrs_info {
250struct ceph_inode_info { 250struct ceph_inode_info {
251 struct ceph_vino i_vino; /* ceph ino + snap */ 251 struct ceph_vino i_vino; /* ceph ino + snap */
252 252
253 spinlock_t i_ceph_lock;
254
253 u64 i_version; 255 u64 i_version;
254 u32 i_time_warp_seq; 256 u32 i_time_warp_seq;
255 257
@@ -271,7 +273,7 @@ struct ceph_inode_info {
271 273
272 struct ceph_inode_xattrs_info i_xattrs; 274 struct ceph_inode_xattrs_info i_xattrs;
273 275
274 /* capabilities. protected _both_ by i_lock and cap->session's 276 /* capabilities. protected _both_ by i_ceph_lock and cap->session's
275 * s_mutex. */ 277 * s_mutex. */
276 struct rb_root i_caps; /* cap list */ 278 struct rb_root i_caps; /* cap list */
277 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */ 279 struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
@@ -437,18 +439,18 @@ static inline void ceph_i_clear(struct inode *inode, unsigned mask)
437{ 439{
438 struct ceph_inode_info *ci = ceph_inode(inode); 440 struct ceph_inode_info *ci = ceph_inode(inode);
439 441
440 spin_lock(&inode->i_lock); 442 spin_lock(&ci->i_ceph_lock);
441 ci->i_ceph_flags &= ~mask; 443 ci->i_ceph_flags &= ~mask;
442 spin_unlock(&inode->i_lock); 444 spin_unlock(&ci->i_ceph_lock);
443} 445}
444 446
445static inline void ceph_i_set(struct inode *inode, unsigned mask) 447static inline void ceph_i_set(struct inode *inode, unsigned mask)
446{ 448{
447 struct ceph_inode_info *ci = ceph_inode(inode); 449 struct ceph_inode_info *ci = ceph_inode(inode);
448 450
449 spin_lock(&inode->i_lock); 451 spin_lock(&ci->i_ceph_lock);
450 ci->i_ceph_flags |= mask; 452 ci->i_ceph_flags |= mask;
451 spin_unlock(&inode->i_lock); 453 spin_unlock(&ci->i_ceph_lock);
452} 454}
453 455
454static inline bool ceph_i_test(struct inode *inode, unsigned mask) 456static inline bool ceph_i_test(struct inode *inode, unsigned mask)
@@ -456,9 +458,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
456 struct ceph_inode_info *ci = ceph_inode(inode); 458 struct ceph_inode_info *ci = ceph_inode(inode);
457 bool r; 459 bool r;
458 460
459 spin_lock(&inode->i_lock); 461 spin_lock(&ci->i_ceph_lock);
460 r = (ci->i_ceph_flags & mask) == mask; 462 r = (ci->i_ceph_flags & mask) == mask;
461 spin_unlock(&inode->i_lock); 463 spin_unlock(&ci->i_ceph_lock);
462 return r; 464 return r;
463} 465}
464 466
@@ -508,9 +510,9 @@ extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
508static inline int ceph_caps_issued(struct ceph_inode_info *ci) 510static inline int ceph_caps_issued(struct ceph_inode_info *ci)
509{ 511{
510 int issued; 512 int issued;
511 spin_lock(&ci->vfs_inode.i_lock); 513 spin_lock(&ci->i_ceph_lock);
512 issued = __ceph_caps_issued(ci, NULL); 514 issued = __ceph_caps_issued(ci, NULL);
513 spin_unlock(&ci->vfs_inode.i_lock); 515 spin_unlock(&ci->i_ceph_lock);
514 return issued; 516 return issued;
515} 517}
516 518
@@ -518,9 +520,9 @@ static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
518 int touch) 520 int touch)
519{ 521{
520 int r; 522 int r;
521 spin_lock(&ci->vfs_inode.i_lock); 523 spin_lock(&ci->i_ceph_lock);
522 r = __ceph_caps_issued_mask(ci, mask, touch); 524 r = __ceph_caps_issued_mask(ci, mask, touch);
523 spin_unlock(&ci->vfs_inode.i_lock); 525 spin_unlock(&ci->i_ceph_lock);
524 return r; 526 return r;
525} 527}
526 528
@@ -743,10 +745,9 @@ extern int ceph_add_cap(struct inode *inode,
743extern void __ceph_remove_cap(struct ceph_cap *cap); 745extern void __ceph_remove_cap(struct ceph_cap *cap);
744static inline void ceph_remove_cap(struct ceph_cap *cap) 746static inline void ceph_remove_cap(struct ceph_cap *cap)
745{ 747{
746 struct inode *inode = &cap->ci->vfs_inode; 748 spin_lock(&cap->ci->i_ceph_lock);
747 spin_lock(&inode->i_lock);
748 __ceph_remove_cap(cap); 749 __ceph_remove_cap(cap);
749 spin_unlock(&inode->i_lock); 750 spin_unlock(&cap->ci->i_ceph_lock);
750} 751}
751extern void ceph_put_cap(struct ceph_mds_client *mdsc, 752extern void ceph_put_cap(struct ceph_mds_client *mdsc,
752 struct ceph_cap *cap); 753 struct ceph_cap *cap);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 96c6739a0280..a5e36e4488a7 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -343,8 +343,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
343} 343}
344 344
345static int __build_xattrs(struct inode *inode) 345static int __build_xattrs(struct inode *inode)
346 __releases(inode->i_lock) 346 __releases(ci->i_ceph_lock)
347 __acquires(inode->i_lock) 347 __acquires(ci->i_ceph_lock)
348{ 348{
349 u32 namelen; 349 u32 namelen;
350 u32 numattr = 0; 350 u32 numattr = 0;
@@ -372,7 +372,7 @@ start:
372 end = p + ci->i_xattrs.blob->vec.iov_len; 372 end = p + ci->i_xattrs.blob->vec.iov_len;
373 ceph_decode_32_safe(&p, end, numattr, bad); 373 ceph_decode_32_safe(&p, end, numattr, bad);
374 xattr_version = ci->i_xattrs.version; 374 xattr_version = ci->i_xattrs.version;
375 spin_unlock(&inode->i_lock); 375 spin_unlock(&ci->i_ceph_lock);
376 376
377 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *), 377 xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
378 GFP_NOFS); 378 GFP_NOFS);
@@ -387,7 +387,7 @@ start:
387 goto bad_lock; 387 goto bad_lock;
388 } 388 }
389 389
390 spin_lock(&inode->i_lock); 390 spin_lock(&ci->i_ceph_lock);
391 if (ci->i_xattrs.version != xattr_version) { 391 if (ci->i_xattrs.version != xattr_version) {
392 /* lost a race, retry */ 392 /* lost a race, retry */
393 for (i = 0; i < numattr; i++) 393 for (i = 0; i < numattr; i++)
@@ -418,7 +418,7 @@ start:
418 418
419 return err; 419 return err;
420bad_lock: 420bad_lock:
421 spin_lock(&inode->i_lock); 421 spin_lock(&ci->i_ceph_lock);
422bad: 422bad:
423 if (xattrs) { 423 if (xattrs) {
424 for (i = 0; i < numattr; i++) 424 for (i = 0; i < numattr; i++)
@@ -512,7 +512,7 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
512 if (vxattrs) 512 if (vxattrs)
513 vxattr = ceph_match_vxattr(vxattrs, name); 513 vxattr = ceph_match_vxattr(vxattrs, name);
514 514
515 spin_lock(&inode->i_lock); 515 spin_lock(&ci->i_ceph_lock);
516 dout("getxattr %p ver=%lld index_ver=%lld\n", inode, 516 dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
517 ci->i_xattrs.version, ci->i_xattrs.index_version); 517 ci->i_xattrs.version, ci->i_xattrs.index_version);
518 518
@@ -520,14 +520,14 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
520 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 520 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
521 goto get_xattr; 521 goto get_xattr;
522 } else { 522 } else {
523 spin_unlock(&inode->i_lock); 523 spin_unlock(&ci->i_ceph_lock);
524 /* get xattrs from mds (if we don't already have them) */ 524 /* get xattrs from mds (if we don't already have them) */
525 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 525 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
526 if (err) 526 if (err)
527 return err; 527 return err;
528 } 528 }
529 529
530 spin_lock(&inode->i_lock); 530 spin_lock(&ci->i_ceph_lock);
531 531
532 if (vxattr && vxattr->readonly) { 532 if (vxattr && vxattr->readonly) {
533 err = vxattr->getxattr_cb(ci, value, size); 533 err = vxattr->getxattr_cb(ci, value, size);
@@ -558,7 +558,7 @@ get_xattr:
558 memcpy(value, xattr->val, xattr->val_len); 558 memcpy(value, xattr->val, xattr->val_len);
559 559
560out: 560out:
561 spin_unlock(&inode->i_lock); 561 spin_unlock(&ci->i_ceph_lock);
562 return err; 562 return err;
563} 563}
564 564
@@ -573,7 +573,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
573 u32 len; 573 u32 len;
574 int i; 574 int i;
575 575
576 spin_lock(&inode->i_lock); 576 spin_lock(&ci->i_ceph_lock);
577 dout("listxattr %p ver=%lld index_ver=%lld\n", inode, 577 dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
578 ci->i_xattrs.version, ci->i_xattrs.index_version); 578 ci->i_xattrs.version, ci->i_xattrs.index_version);
579 579
@@ -581,13 +581,13 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
581 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { 581 (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
582 goto list_xattr; 582 goto list_xattr;
583 } else { 583 } else {
584 spin_unlock(&inode->i_lock); 584 spin_unlock(&ci->i_ceph_lock);
585 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR); 585 err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
586 if (err) 586 if (err)
587 return err; 587 return err;
588 } 588 }
589 589
590 spin_lock(&inode->i_lock); 590 spin_lock(&ci->i_ceph_lock);
591 591
592 err = __build_xattrs(inode); 592 err = __build_xattrs(inode);
593 if (err < 0) 593 if (err < 0)
@@ -619,7 +619,7 @@ list_xattr:
619 } 619 }
620 620
621out: 621out:
622 spin_unlock(&inode->i_lock); 622 spin_unlock(&ci->i_ceph_lock);
623 return err; 623 return err;
624} 624}
625 625
@@ -739,7 +739,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
739 if (!xattr) 739 if (!xattr)
740 goto out; 740 goto out;
741 741
742 spin_lock(&inode->i_lock); 742 spin_lock(&ci->i_ceph_lock);
743retry: 743retry:
744 issued = __ceph_caps_issued(ci, NULL); 744 issued = __ceph_caps_issued(ci, NULL);
745 if (!(issued & CEPH_CAP_XATTR_EXCL)) 745 if (!(issued & CEPH_CAP_XATTR_EXCL))
@@ -752,12 +752,12 @@ retry:
752 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) { 752 required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
753 struct ceph_buffer *blob = NULL; 753 struct ceph_buffer *blob = NULL;
754 754
755 spin_unlock(&inode->i_lock); 755 spin_unlock(&ci->i_ceph_lock);
756 dout(" preaallocating new blob size=%d\n", required_blob_size); 756 dout(" preaallocating new blob size=%d\n", required_blob_size);
757 blob = ceph_buffer_new(required_blob_size, GFP_NOFS); 757 blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
758 if (!blob) 758 if (!blob)
759 goto out; 759 goto out;
760 spin_lock(&inode->i_lock); 760 spin_lock(&ci->i_ceph_lock);
761 if (ci->i_xattrs.prealloc_blob) 761 if (ci->i_xattrs.prealloc_blob)
762 ceph_buffer_put(ci->i_xattrs.prealloc_blob); 762 ceph_buffer_put(ci->i_xattrs.prealloc_blob);
763 ci->i_xattrs.prealloc_blob = blob; 763 ci->i_xattrs.prealloc_blob = blob;
@@ -770,13 +770,13 @@ retry:
770 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL); 770 dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
771 ci->i_xattrs.dirty = true; 771 ci->i_xattrs.dirty = true;
772 inode->i_ctime = CURRENT_TIME; 772 inode->i_ctime = CURRENT_TIME;
773 spin_unlock(&inode->i_lock); 773 spin_unlock(&ci->i_ceph_lock);
774 if (dirty) 774 if (dirty)
775 __mark_inode_dirty(inode, dirty); 775 __mark_inode_dirty(inode, dirty);
776 return err; 776 return err;
777 777
778do_sync: 778do_sync:
779 spin_unlock(&inode->i_lock); 779 spin_unlock(&ci->i_ceph_lock);
780 err = ceph_sync_setxattr(dentry, name, value, size, flags); 780 err = ceph_sync_setxattr(dentry, name, value, size, flags);
781out: 781out:
782 kfree(newname); 782 kfree(newname);
@@ -833,7 +833,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
833 return -EOPNOTSUPP; 833 return -EOPNOTSUPP;
834 } 834 }
835 835
836 spin_lock(&inode->i_lock); 836 spin_lock(&ci->i_ceph_lock);
837 __build_xattrs(inode); 837 __build_xattrs(inode);
838 issued = __ceph_caps_issued(ci, NULL); 838 issued = __ceph_caps_issued(ci, NULL);
839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued)); 839 dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -846,12 +846,12 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
846 ci->i_xattrs.dirty = true; 846 ci->i_xattrs.dirty = true;
847 inode->i_ctime = CURRENT_TIME; 847 inode->i_ctime = CURRENT_TIME;
848 848
849 spin_unlock(&inode->i_lock); 849 spin_unlock(&ci->i_ceph_lock);
850 if (dirty) 850 if (dirty)
851 __mark_inode_dirty(inode, dirty); 851 __mark_inode_dirty(inode, dirty);
852 return err; 852 return err;
853do_sync: 853do_sync:
854 spin_unlock(&inode->i_lock); 854 spin_unlock(&ci->i_ceph_lock);
855 err = ceph_send_removexattr(dentry, name); 855 err = ceph_send_removexattr(dentry, name);
856 return err; 856 return err;
857} 857}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index dca9e5e0f73b..3f152b92a94a 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -272,7 +272,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
272 cd = __register_chrdev_region(major, baseminor, count, name); 272 cd = __register_chrdev_region(major, baseminor, count, name);
273 if (IS_ERR(cd)) 273 if (IS_ERR(cd))
274 return PTR_ERR(cd); 274 return PTR_ERR(cd);
275 275
276 cdev = cdev_alloc(); 276 cdev = cdev_alloc();
277 if (!cdev) 277 if (!cdev)
278 goto out2; 278 goto out2;
@@ -280,7 +280,7 @@ int __register_chrdev(unsigned int major, unsigned int baseminor,
280 cdev->owner = fops->owner; 280 cdev->owner = fops->owner;
281 cdev->ops = fops; 281 cdev->ops = fops;
282 kobject_set_name(&cdev->kobj, "%s", name); 282 kobject_set_name(&cdev->kobj, "%s", name);
283 283
284 err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); 284 err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
285 if (err) 285 if (err)
286 goto out; 286 goto out;
@@ -405,7 +405,7 @@ static int chrdev_open(struct inode *inode, struct file *filp)
405 goto out_cdev_put; 405 goto out_cdev_put;
406 406
407 if (filp->f_op->open) { 407 if (filp->f_op->open) {
408 ret = filp->f_op->open(inode,filp); 408 ret = filp->f_op->open(inode, filp);
409 if (ret) 409 if (ret)
410 goto out_cdev_put; 410 goto out_cdev_put;
411 } 411 }
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 500d65859279..c865bfdfe819 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -59,8 +59,8 @@ struct cifs_sb_info {
59 gid_t mnt_gid; 59 gid_t mnt_gid;
60 uid_t mnt_backupuid; 60 uid_t mnt_backupuid;
61 gid_t mnt_backupgid; 61 gid_t mnt_backupgid;
62 mode_t mnt_file_mode; 62 umode_t mnt_file_mode;
63 mode_t mnt_dir_mode; 63 umode_t mnt_dir_mode;
64 unsigned int mnt_cifs_flags; 64 unsigned int mnt_cifs_flags;
65 char *mountdata; /* options received at mount time or via DFS refs */ 65 char *mountdata; /* options received at mount time or via DFS refs */
66 struct backing_dev_info bdi; 66 struct backing_dev_info bdi;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 8f1fe324162b..b1fd382d1952 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -343,9 +343,9 @@ cifs_show_security(struct seq_file *s, struct TCP_Server_Info *server)
343 * ones are. 343 * ones are.
344 */ 344 */
345static int 345static int
346cifs_show_options(struct seq_file *s, struct vfsmount *m) 346cifs_show_options(struct seq_file *s, struct dentry *root)
347{ 347{
348 struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb); 348 struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
349 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); 349 struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
350 struct sockaddr *srcaddr; 350 struct sockaddr *srcaddr;
351 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; 351 srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
@@ -393,7 +393,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
393 cifs_show_address(s, tcon->ses->server); 393 cifs_show_address(s, tcon->ses->server);
394 394
395 if (!tcon->unix_ext) 395 if (!tcon->unix_ext)
396 seq_printf(s, ",file_mode=0%o,dir_mode=0%o", 396 seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho",
397 cifs_sb->mnt_file_mode, 397 cifs_sb->mnt_file_mode,
398 cifs_sb->mnt_dir_mode); 398 cifs_sb->mnt_dir_mode);
399 if (tcon->seal) 399 if (tcon->seal)
@@ -430,7 +430,7 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
430 seq_printf(s, ",cifsacl"); 430 seq_printf(s, ",cifsacl");
431 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) 431 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
432 seq_printf(s, ",dynperm"); 432 seq_printf(s, ",dynperm");
433 if (m->mnt_sb->s_flags & MS_POSIXACL) 433 if (root->d_sb->s_flags & MS_POSIXACL)
434 seq_printf(s, ",acl"); 434 seq_printf(s, ",acl");
435 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) 435 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
436 seq_printf(s, ",mfsymlinks"); 436 seq_printf(s, ",mfsymlinks");
@@ -488,7 +488,7 @@ static void cifs_umount_begin(struct super_block *sb)
488} 488}
489 489
490#ifdef CONFIG_CIFS_STATS2 490#ifdef CONFIG_CIFS_STATS2
491static int cifs_show_stats(struct seq_file *s, struct vfsmount *mnt) 491static int cifs_show_stats(struct seq_file *s, struct dentry *root)
492{ 492{
493 /* BB FIXME */ 493 /* BB FIXME */
494 return 0; 494 return 0;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 30ff56005d8f..fe5ecf1b422a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -44,14 +44,14 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf;
44/* Functions related to inodes */ 44/* Functions related to inodes */
45extern const struct inode_operations cifs_dir_inode_ops; 45extern const struct inode_operations cifs_dir_inode_ops;
46extern struct inode *cifs_root_iget(struct super_block *); 46extern struct inode *cifs_root_iget(struct super_block *);
47extern int cifs_create(struct inode *, struct dentry *, int, 47extern int cifs_create(struct inode *, struct dentry *, umode_t,
48 struct nameidata *); 48 struct nameidata *);
49extern struct dentry *cifs_lookup(struct inode *, struct dentry *, 49extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
50 struct nameidata *); 50 struct nameidata *);
51extern int cifs_unlink(struct inode *dir, struct dentry *dentry); 51extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
52extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); 52extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
53extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); 53extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
54extern int cifs_mkdir(struct inode *, struct dentry *, int); 54extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
55extern int cifs_rmdir(struct inode *, struct dentry *); 55extern int cifs_rmdir(struct inode *, struct dentry *);
56extern int cifs_rename(struct inode *, struct dentry *, struct inode *, 56extern int cifs_rename(struct inode *, struct dentry *, struct inode *,
57 struct dentry *); 57 struct dentry *);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 8238aa13e01c..ba53c1c6c6cc 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -169,8 +169,8 @@ struct smb_vol {
169 gid_t linux_gid; 169 gid_t linux_gid;
170 uid_t backupuid; 170 uid_t backupuid;
171 gid_t backupgid; 171 gid_t backupgid;
172 mode_t file_mode; 172 umode_t file_mode;
173 mode_t dir_mode; 173 umode_t dir_mode;
174 unsigned secFlg; 174 unsigned secFlg;
175 bool retry:1; 175 bool retry:1;
176 bool intr:1; 176 bool intr:1;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 8cd4b52d4217..4666780f315d 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -282,7 +282,7 @@ static int coalesce_t2(struct smb_hdr *psecond, struct smb_hdr *pTargetSMB)
282 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length); 282 byte_count = be32_to_cpu(pTargetSMB->smb_buf_length);
283 byte_count += total_in_buf2; 283 byte_count += total_in_buf2;
284 /* don't allow buffer to overflow */ 284 /* don't allow buffer to overflow */
285 if (byte_count > CIFSMaxBufSize) 285 if (byte_count > CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4)
286 return -ENOBUFS; 286 return -ENOBUFS;
287 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count); 287 pTargetSMB->smb_buf_length = cpu_to_be32(byte_count);
288 288
@@ -2122,7 +2122,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
2122 warned_on_ntlm = true; 2122 warned_on_ntlm = true;
2123 cERROR(1, "default security mechanism requested. The default " 2123 cERROR(1, "default security mechanism requested. The default "
2124 "security mechanism will be upgraded from ntlm to " 2124 "security mechanism will be upgraded from ntlm to "
2125 "ntlmv2 in kernel release 3.2"); 2125 "ntlmv2 in kernel release 3.3");
2126 } 2126 }
2127 ses->overrideSecFlg = volume_info->secFlg; 2127 ses->overrideSecFlg = volume_info->secFlg;
2128 2128
@@ -2819,7 +2819,7 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
2819 cifs_sb->mnt_backupgid = pvolume_info->backupgid; 2819 cifs_sb->mnt_backupgid = pvolume_info->backupgid;
2820 cifs_sb->mnt_file_mode = pvolume_info->file_mode; 2820 cifs_sb->mnt_file_mode = pvolume_info->file_mode;
2821 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; 2821 cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
2822 cFYI(1, "file mode: 0x%x dir mode: 0x%x", 2822 cFYI(1, "file mode: 0x%hx dir mode: 0x%hx",
2823 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); 2823 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
2824 2824
2825 cifs_sb->actimeo = pvolume_info->actimeo; 2825 cifs_sb->actimeo = pvolume_info->actimeo;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index d7eeb9d3ed6f..df8fecb5b993 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -136,7 +136,7 @@ cifs_bp_rename_retry:
136/* Inode operations in similar order to how they appear in Linux file fs.h */ 136/* Inode operations in similar order to how they appear in Linux file fs.h */
137 137
138int 138int
139cifs_create(struct inode *inode, struct dentry *direntry, int mode, 139cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
140 struct nameidata *nd) 140 struct nameidata *nd)
141{ 141{
142 int rc = -ENOENT; 142 int rc = -ENOENT;
@@ -355,7 +355,7 @@ cifs_create_out:
355 return rc; 355 return rc;
356} 356}
357 357
358int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, 358int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
359 dev_t device_number) 359 dev_t device_number)
360{ 360{
361 int rc = -EPERM; 361 int rc = -EPERM;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index e851d5b8931e..a5f54b7d9822 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1264,7 +1264,7 @@ unlink_out:
1264 return rc; 1264 return rc;
1265} 1265}
1266 1266
1267int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) 1267int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
1268{ 1268{
1269 int rc = 0, tmprc; 1269 int rc = 0, tmprc;
1270 int xid; 1270 int xid;
@@ -1275,7 +1275,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode)
1275 struct inode *newinode = NULL; 1275 struct inode *newinode = NULL;
1276 struct cifs_fattr fattr; 1276 struct cifs_fattr fattr;
1277 1277
1278 cFYI(1, "In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode); 1278 cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode);
1279 1279
1280 cifs_sb = CIFS_SB(inode->i_sb); 1280 cifs_sb = CIFS_SB(inode->i_sb);
1281 tlink = cifs_sb_tlink(cifs_sb); 1281 tlink = cifs_sb_tlink(cifs_sb);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 28e7e135cfab..83d2fd8ec24b 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -30,14 +30,14 @@
30#include "coda_int.h" 30#include "coda_int.h"
31 31
32/* dir inode-ops */ 32/* dir inode-ops */
33static int coda_create(struct inode *dir, struct dentry *new, int mode, struct nameidata *nd); 33static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd);
34static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd); 34static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd);
35static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, 35static int coda_link(struct dentry *old_dentry, struct inode *dir_inode,
36 struct dentry *entry); 36 struct dentry *entry);
37static int coda_unlink(struct inode *dir_inode, struct dentry *entry); 37static int coda_unlink(struct inode *dir_inode, struct dentry *entry);
38static int coda_symlink(struct inode *dir_inode, struct dentry *entry, 38static int coda_symlink(struct inode *dir_inode, struct dentry *entry,
39 const char *symname); 39 const char *symname);
40static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, int mode); 40static int coda_mkdir(struct inode *dir_inode, struct dentry *entry, umode_t mode);
41static int coda_rmdir(struct inode *dir_inode, struct dentry *entry); 41static int coda_rmdir(struct inode *dir_inode, struct dentry *entry);
42static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, 42static int coda_rename(struct inode *old_inode, struct dentry *old_dentry,
43 struct inode *new_inode, struct dentry *new_dentry); 43 struct inode *new_inode, struct dentry *new_dentry);
@@ -191,7 +191,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
191} 191}
192 192
193/* creation routines: create, mknod, mkdir, link, symlink */ 193/* creation routines: create, mknod, mkdir, link, symlink */
194static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd) 194static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd)
195{ 195{
196 int error; 196 int error;
197 const char *name=de->d_name.name; 197 const char *name=de->d_name.name;
@@ -223,7 +223,7 @@ err_out:
223 return error; 223 return error;
224} 224}
225 225
226static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) 226static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
227{ 227{
228 struct inode *inode; 228 struct inode *inode;
229 struct coda_vattr attrs; 229 struct coda_vattr attrs;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 871b27715465..1c08a8cd673a 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,6 @@ static struct inode *coda_alloc_inode(struct super_block *sb)
58static void coda_i_callback(struct rcu_head *head) 58static void coda_i_callback(struct rcu_head *head)
59{ 59{
60 struct inode *inode = container_of(head, struct inode, i_rcu); 60 struct inode *inode = container_of(head, struct inode, i_rcu);
61 INIT_LIST_HEAD(&inode->i_dentry);
62 kmem_cache_free(coda_inode_cachep, ITOC(inode)); 61 kmem_cache_free(coda_inode_cachep, ITOC(inode));
63} 62}
64 63
diff --git a/fs/compat.c b/fs/compat.c
index c98787536bb8..fa9d721ecfee 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -342,16 +342,9 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
342 */ 342 */
343asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) 343asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
344{ 344{
345 struct super_block *sb;
346 struct compat_ustat tmp; 345 struct compat_ustat tmp;
347 struct kstatfs sbuf; 346 struct kstatfs sbuf;
348 int err; 347 int err = vfs_ustat(new_decode_dev(dev), &sbuf);
349
350 sb = user_get_super(new_decode_dev(dev));
351 if (!sb)
352 return -EINVAL;
353 err = statfs_by_dentry(sb->s_root, &sbuf);
354 drop_super(sb);
355 if (err) 348 if (err)
356 return err; 349 return err;
357 350
@@ -1288,7 +1281,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32,
1288 * O_LARGEFILE flag. 1281 * O_LARGEFILE flag.
1289 */ 1282 */
1290asmlinkage long 1283asmlinkage long
1291compat_sys_open(const char __user *filename, int flags, int mode) 1284compat_sys_open(const char __user *filename, int flags, umode_t mode)
1292{ 1285{
1293 return do_sys_open(AT_FDCWD, filename, flags, mode); 1286 return do_sys_open(AT_FDCWD, filename, flags, mode);
1294} 1287}
@@ -1298,7 +1291,7 @@ compat_sys_open(const char __user *filename, int flags, int mode)
1298 * O_LARGEFILE flag. 1291 * O_LARGEFILE flag.
1299 */ 1292 */
1300asmlinkage long 1293asmlinkage long
1301compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, int mode) 1294compat_sys_openat(unsigned int dfd, const char __user *filename, int flags, umode_t mode)
1302{ 1295{
1303 return do_sys_open(dfd, filename, flags, mode); 1296 return do_sys_open(dfd, filename, flags, mode);
1304} 1297}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 51352de88ef1..a10e428b32b4 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1506,35 +1506,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
1506 return -ENOIOCTLCMD; 1506 return -ENOIOCTLCMD;
1507} 1507}
1508 1508
1509static void compat_ioctl_error(struct file *filp, unsigned int fd,
1510 unsigned int cmd, unsigned long arg)
1511{
1512 char buf[10];
1513 char *fn = "?";
1514 char *path;
1515
1516 /* find the name of the device. */
1517 path = (char *)__get_free_page(GFP_KERNEL);
1518 if (path) {
1519 fn = d_path(&filp->f_path, path, PAGE_SIZE);
1520 if (IS_ERR(fn))
1521 fn = "?";
1522 }
1523
1524 sprintf(buf,"'%c'", (cmd>>_IOC_TYPESHIFT) & _IOC_TYPEMASK);
1525 if (!isprint(buf[1]))
1526 sprintf(buf, "%02x", buf[1]);
1527 compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
1528 "cmd(%08x){t:%s;sz:%u} arg(%08x) on %s\n",
1529 current->comm, current->pid,
1530 (int)fd, (unsigned int)cmd, buf,
1531 (cmd >> _IOC_SIZESHIFT) & _IOC_SIZEMASK,
1532 (unsigned int)arg, fn);
1533
1534 if (path)
1535 free_page((unsigned long)path);
1536}
1537
1538static int compat_ioctl_check_table(unsigned int xcmd) 1509static int compat_ioctl_check_table(unsigned int xcmd)
1539{ 1510{
1540 int i; 1511 int i;
@@ -1621,13 +1592,8 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
1621 goto found_handler; 1592 goto found_handler;
1622 1593
1623 error = do_ioctl_trans(fd, cmd, arg, filp); 1594 error = do_ioctl_trans(fd, cmd, arg, filp);
1624 if (error == -ENOIOCTLCMD) { 1595 if (error == -ENOIOCTLCMD)
1625 static int count; 1596 error = -ENOTTY;
1626
1627 if (++count <= 50)
1628 compat_ioctl_error(filp, fd, cmd, arg);
1629 error = -EINVAL;
1630 }
1631 1597
1632 goto out_fput; 1598 goto out_fput;
1633 1599
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 82bda8fdfc1c..ede857d20a04 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -63,8 +63,8 @@ extern struct kmem_cache *configfs_dir_cachep;
63 63
64extern int configfs_is_root(struct config_item *item); 64extern int configfs_is_root(struct config_item *item);
65 65
66extern struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent *); 66extern struct inode * configfs_new_inode(umode_t mode, struct configfs_dirent *);
67extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *)); 67extern int configfs_create(struct dentry *, umode_t mode, int (*init)(struct inode *));
68extern int configfs_inode_init(void); 68extern int configfs_inode_init(void);
69extern void configfs_inode_exit(void); 69extern void configfs_inode_exit(void);
70 70
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9a37a9b6de3a..5ddd7ebd9dcd 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -311,8 +311,8 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
311 311
312 if (item->ci_parent) 312 if (item->ci_parent)
313 parent = item->ci_parent->ci_dentry; 313 parent = item->ci_parent->ci_dentry;
314 else if (configfs_mount && configfs_mount->mnt_sb) 314 else if (configfs_mount)
315 parent = configfs_mount->mnt_sb->s_root; 315 parent = configfs_mount->mnt_root;
316 else 316 else
317 return -EFAULT; 317 return -EFAULT;
318 318
@@ -1170,7 +1170,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
1170} 1170}
1171EXPORT_SYMBOL(configfs_undepend_item); 1171EXPORT_SYMBOL(configfs_undepend_item);
1172 1172
1173static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1173static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1174{ 1174{
1175 int ret = 0; 1175 int ret = 0;
1176 int module_got = 0; 1176 int module_got = 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index ca418aaf6352..3ee36d418863 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -116,7 +116,7 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
116 return error; 116 return error;
117} 117}
118 118
119static inline void set_default_inode_attr(struct inode * inode, mode_t mode) 119static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
120{ 120{
121 inode->i_mode = mode; 121 inode->i_mode = mode;
122 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 122 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -132,7 +132,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
132 inode->i_ctime = iattr->ia_ctime; 132 inode->i_ctime = iattr->ia_ctime;
133} 133}
134 134
135struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) 135struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent * sd)
136{ 136{
137 struct inode * inode = new_inode(configfs_sb); 137 struct inode * inode = new_inode(configfs_sb);
138 if (inode) { 138 if (inode) {
@@ -185,7 +185,7 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
185 185
186#endif /* CONFIG_LOCKDEP */ 186#endif /* CONFIG_LOCKDEP */
187 187
188int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) 188int configfs_create(struct dentry * dentry, umode_t mode, int (*init)(struct inode *))
189{ 189{
190 int error = 0; 190 int error = 0;
191 struct inode * inode = NULL; 191 struct inode * inode = NULL;
@@ -292,7 +292,7 @@ int __init configfs_inode_init(void)
292 return bdi_init(&configfs_backing_dev_info); 292 return bdi_init(&configfs_backing_dev_info);
293} 293}
294 294
295void __exit configfs_inode_exit(void) 295void configfs_inode_exit(void)
296{ 296{
297 bdi_destroy(&configfs_backing_dev_info); 297 bdi_destroy(&configfs_backing_dev_info);
298} 298}
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index ecc62178beda..276e15cafd58 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -143,28 +143,26 @@ static int __init configfs_init(void)
143 goto out; 143 goto out;
144 144
145 config_kobj = kobject_create_and_add("config", kernel_kobj); 145 config_kobj = kobject_create_and_add("config", kernel_kobj);
146 if (!config_kobj) { 146 if (!config_kobj)
147 kmem_cache_destroy(configfs_dir_cachep); 147 goto out2;
148 configfs_dir_cachep = NULL; 148
149 goto out; 149 err = configfs_inode_init();
150 } 150 if (err)
151 goto out3;
151 152
152 err = register_filesystem(&configfs_fs_type); 153 err = register_filesystem(&configfs_fs_type);
153 if (err) { 154 if (err)
154 printk(KERN_ERR "configfs: Unable to register filesystem!\n"); 155 goto out4;
155 kobject_put(config_kobj);
156 kmem_cache_destroy(configfs_dir_cachep);
157 configfs_dir_cachep = NULL;
158 goto out;
159 }
160 156
161 err = configfs_inode_init(); 157 return 0;
162 if (err) { 158out4:
163 unregister_filesystem(&configfs_fs_type); 159 printk(KERN_ERR "configfs: Unable to register filesystem!\n");
164 kobject_put(config_kobj); 160 configfs_inode_exit();
165 kmem_cache_destroy(configfs_dir_cachep); 161out3:
166 configfs_dir_cachep = NULL; 162 kobject_put(config_kobj);
167 } 163out2:
164 kmem_cache_destroy(configfs_dir_cachep);
165 configfs_dir_cachep = NULL;
168out: 166out:
169 return err; 167 return err;
170} 168}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 739fb59bcdc2..a2ee8f9f5a38 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -20,7 +20,6 @@
20#include <linux/cramfs_fs.h> 20#include <linux/cramfs_fs.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/cramfs_fs_sb.h> 22#include <linux/cramfs_fs_sb.h>
23#include <linux/buffer_head.h>
24#include <linux/vfs.h> 23#include <linux/vfs.h>
25#include <linux/mutex.h> 24#include <linux/mutex.h>
26 25
@@ -378,7 +377,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
378 unsigned long nextoffset; 377 unsigned long nextoffset;
379 char *name; 378 char *name;
380 ino_t ino; 379 ino_t ino;
381 mode_t mode; 380 umode_t mode;
382 int namelen, error; 381 int namelen, error;
383 382
384 mutex_lock(&read_mutex); 383 mutex_lock(&read_mutex);
diff --git a/fs/dcache.c b/fs/dcache.c
index 89509b5a090e..9791b1e7eee4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -38,6 +38,7 @@
38#include <linux/prefetch.h> 38#include <linux/prefetch.h>
39#include <linux/ratelimit.h> 39#include <linux/ratelimit.h>
40#include "internal.h" 40#include "internal.h"
41#include "mount.h"
41 42
42/* 43/*
43 * Usage: 44 * Usage:
@@ -2451,6 +2452,7 @@ static int prepend_path(const struct path *path,
2451{ 2452{
2452 struct dentry *dentry = path->dentry; 2453 struct dentry *dentry = path->dentry;
2453 struct vfsmount *vfsmnt = path->mnt; 2454 struct vfsmount *vfsmnt = path->mnt;
2455 struct mount *mnt = real_mount(vfsmnt);
2454 bool slash = false; 2456 bool slash = false;
2455 int error = 0; 2457 int error = 0;
2456 2458
@@ -2460,11 +2462,11 @@ static int prepend_path(const struct path *path,
2460 2462
2461 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { 2463 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
2462 /* Global root? */ 2464 /* Global root? */
2463 if (vfsmnt->mnt_parent == vfsmnt) { 2465 if (!mnt_has_parent(mnt))
2464 goto global_root; 2466 goto global_root;
2465 } 2467 dentry = mnt->mnt_mountpoint;
2466 dentry = vfsmnt->mnt_mountpoint; 2468 mnt = mnt->mnt_parent;
2467 vfsmnt = vfsmnt->mnt_parent; 2469 vfsmnt = &mnt->mnt;
2468 continue; 2470 continue;
2469 } 2471 }
2470 parent = dentry->d_parent; 2472 parent = dentry->d_parent;
@@ -2501,7 +2503,7 @@ global_root:
2501 if (!slash) 2503 if (!slash)
2502 error = prepend(buffer, buflen, "/", 1); 2504 error = prepend(buffer, buflen, "/", 1);
2503 if (!error) 2505 if (!error)
2504 error = vfsmnt->mnt_ns ? 1 : 2; 2506 error = real_mount(vfsmnt)->mnt_ns ? 1 : 2;
2505 goto out; 2507 goto out;
2506} 2508}
2507 2509
@@ -2853,31 +2855,6 @@ int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2853 return result; 2855 return result;
2854} 2856}
2855 2857
2856int path_is_under(struct path *path1, struct path *path2)
2857{
2858 struct vfsmount *mnt = path1->mnt;
2859 struct dentry *dentry = path1->dentry;
2860 int res;
2861
2862 br_read_lock(vfsmount_lock);
2863 if (mnt != path2->mnt) {
2864 for (;;) {
2865 if (mnt->mnt_parent == mnt) {
2866 br_read_unlock(vfsmount_lock);
2867 return 0;
2868 }
2869 if (mnt->mnt_parent == path2->mnt)
2870 break;
2871 mnt = mnt->mnt_parent;
2872 }
2873 dentry = mnt->mnt_mountpoint;
2874 }
2875 res = is_subdir(dentry, path2->dentry);
2876 br_read_unlock(vfsmount_lock);
2877 return res;
2878}
2879EXPORT_SYMBOL(path_is_under);
2880
2881void d_genocide(struct dentry *root) 2858void d_genocide(struct dentry *root)
2882{ 2859{
2883 struct dentry *this_parent; 2860 struct dentry *this_parent;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 90f76575c056..f65d4455c5e5 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -15,9 +15,11 @@
15 15
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/fs.h> 17#include <linux/fs.h>
18#include <linux/seq_file.h>
18#include <linux/pagemap.h> 19#include <linux/pagemap.h>
19#include <linux/namei.h> 20#include <linux/namei.h>
20#include <linux/debugfs.h> 21#include <linux/debugfs.h>
22#include <linux/io.h>
21 23
22static ssize_t default_read_file(struct file *file, char __user *buf, 24static ssize_t default_read_file(struct file *file, char __user *buf,
23 size_t count, loff_t *ppos) 25 size_t count, loff_t *ppos)
@@ -95,7 +97,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
95 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 97 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
96 * code. 98 * code.
97 */ 99 */
98struct dentry *debugfs_create_u8(const char *name, mode_t mode, 100struct dentry *debugfs_create_u8(const char *name, umode_t mode,
99 struct dentry *parent, u8 *value) 101 struct dentry *parent, u8 *value)
100{ 102{
101 /* if there are no write bits set, make read only */ 103 /* if there are no write bits set, make read only */
@@ -147,7 +149,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
147 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 149 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
148 * code. 150 * code.
149 */ 151 */
150struct dentry *debugfs_create_u16(const char *name, mode_t mode, 152struct dentry *debugfs_create_u16(const char *name, umode_t mode,
151 struct dentry *parent, u16 *value) 153 struct dentry *parent, u16 *value)
152{ 154{
153 /* if there are no write bits set, make read only */ 155 /* if there are no write bits set, make read only */
@@ -199,7 +201,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
199 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 201 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
200 * code. 202 * code.
201 */ 203 */
202struct dentry *debugfs_create_u32(const char *name, mode_t mode, 204struct dentry *debugfs_create_u32(const char *name, umode_t mode,
203 struct dentry *parent, u32 *value) 205 struct dentry *parent, u32 *value)
204{ 206{
205 /* if there are no write bits set, make read only */ 207 /* if there are no write bits set, make read only */
@@ -252,7 +254,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
252 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 254 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
253 * code. 255 * code.
254 */ 256 */
255struct dentry *debugfs_create_u64(const char *name, mode_t mode, 257struct dentry *debugfs_create_u64(const char *name, umode_t mode,
256 struct dentry *parent, u64 *value) 258 struct dentry *parent, u64 *value)
257{ 259{
258 /* if there are no write bits set, make read only */ 260 /* if there are no write bits set, make read only */
@@ -298,7 +300,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n
298 * @value: a pointer to the variable that the file should read to and write 300 * @value: a pointer to the variable that the file should read to and write
299 * from. 301 * from.
300 */ 302 */
301struct dentry *debugfs_create_x8(const char *name, mode_t mode, 303struct dentry *debugfs_create_x8(const char *name, umode_t mode,
302 struct dentry *parent, u8 *value) 304 struct dentry *parent, u8 *value)
303{ 305{
304 /* if there are no write bits set, make read only */ 306 /* if there are no write bits set, make read only */
@@ -322,7 +324,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x8);
322 * @value: a pointer to the variable that the file should read to and write 324 * @value: a pointer to the variable that the file should read to and write
323 * from. 325 * from.
324 */ 326 */
325struct dentry *debugfs_create_x16(const char *name, mode_t mode, 327struct dentry *debugfs_create_x16(const char *name, umode_t mode,
326 struct dentry *parent, u16 *value) 328 struct dentry *parent, u16 *value)
327{ 329{
328 /* if there are no write bits set, make read only */ 330 /* if there are no write bits set, make read only */
@@ -346,7 +348,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x16);
346 * @value: a pointer to the variable that the file should read to and write 348 * @value: a pointer to the variable that the file should read to and write
347 * from. 349 * from.
348 */ 350 */
349struct dentry *debugfs_create_x32(const char *name, mode_t mode, 351struct dentry *debugfs_create_x32(const char *name, umode_t mode,
350 struct dentry *parent, u32 *value) 352 struct dentry *parent, u32 *value)
351{ 353{
352 /* if there are no write bits set, make read only */ 354 /* if there are no write bits set, make read only */
@@ -370,7 +372,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_x32);
370 * @value: a pointer to the variable that the file should read to and write 372 * @value: a pointer to the variable that the file should read to and write
371 * from. 373 * from.
372 */ 374 */
373struct dentry *debugfs_create_x64(const char *name, mode_t mode, 375struct dentry *debugfs_create_x64(const char *name, umode_t mode,
374 struct dentry *parent, u64 *value) 376 struct dentry *parent, u64 *value)
375{ 377{
376 return debugfs_create_file(name, mode, parent, value, &fops_x64); 378 return debugfs_create_file(name, mode, parent, value, &fops_x64);
@@ -401,7 +403,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set,
401 * @value: a pointer to the variable that the file should read to and write 403 * @value: a pointer to the variable that the file should read to and write
402 * from. 404 * from.
403 */ 405 */
404struct dentry *debugfs_create_size_t(const char *name, mode_t mode, 406struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
405 struct dentry *parent, size_t *value) 407 struct dentry *parent, size_t *value)
406{ 408{
407 return debugfs_create_file(name, mode, parent, value, &fops_size_t); 409 return debugfs_create_file(name, mode, parent, value, &fops_size_t);
@@ -473,7 +475,7 @@ static const struct file_operations fops_bool = {
473 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 475 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
474 * code. 476 * code.
475 */ 477 */
476struct dentry *debugfs_create_bool(const char *name, mode_t mode, 478struct dentry *debugfs_create_bool(const char *name, umode_t mode,
477 struct dentry *parent, u32 *value) 479 struct dentry *parent, u32 *value)
478{ 480{
479 return debugfs_create_file(name, mode, parent, value, &fops_bool); 481 return debugfs_create_file(name, mode, parent, value, &fops_bool);
@@ -518,10 +520,103 @@ static const struct file_operations fops_blob = {
518 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling 520 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
519 * code. 521 * code.
520 */ 522 */
521struct dentry *debugfs_create_blob(const char *name, mode_t mode, 523struct dentry *debugfs_create_blob(const char *name, umode_t mode,
522 struct dentry *parent, 524 struct dentry *parent,
523 struct debugfs_blob_wrapper *blob) 525 struct debugfs_blob_wrapper *blob)
524{ 526{
525 return debugfs_create_file(name, mode, parent, blob, &fops_blob); 527 return debugfs_create_file(name, mode, parent, blob, &fops_blob);
526} 528}
527EXPORT_SYMBOL_GPL(debugfs_create_blob); 529EXPORT_SYMBOL_GPL(debugfs_create_blob);
530
531#ifdef CONFIG_HAS_IOMEM
532
533/*
534 * The regset32 stuff is used to print 32-bit registers using the
535 * seq_file utilities. We offer printing a register set in an already-opened
536 * sequential file or create a debugfs file that only prints a regset32.
537 */
538
539/**
540 * debugfs_print_regs32 - use seq_print to describe a set of registers
541 * @s: the seq_file structure being used to generate output
542 * @regs: an array if struct debugfs_reg32 structures
543 * @mregs: the length of the above array
544 * @base: the base address to be used in reading the registers
545 * @prefix: a string to be prefixed to every output line
546 *
547 * This function outputs a text block describing the current values of
548 * some 32-bit hardware registers. It is meant to be used within debugfs
549 * files based on seq_file that need to show registers, intermixed with other
550 * information. The prefix argument may be used to specify a leading string,
551 * because some peripherals have several blocks of identical registers,
552 * for example configuration of dma channels
553 */
554int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
555 int nregs, void __iomem *base, char *prefix)
556{
557 int i, ret = 0;
558
559 for (i = 0; i < nregs; i++, regs++) {
560 if (prefix)
561 ret += seq_printf(s, "%s", prefix);
562 ret += seq_printf(s, "%s = 0x%08x\n", regs->name,
563 readl(base + regs->offset));
564 }
565 return ret;
566}
567EXPORT_SYMBOL_GPL(debugfs_print_regs32);
568
569static int debugfs_show_regset32(struct seq_file *s, void *data)
570{
571 struct debugfs_regset32 *regset = s->private;
572
573 debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, "");
574 return 0;
575}
576
577static int debugfs_open_regset32(struct inode *inode, struct file *file)
578{
579 return single_open(file, debugfs_show_regset32, inode->i_private);
580}
581
582static const struct file_operations fops_regset32 = {
583 .open = debugfs_open_regset32,
584 .read = seq_read,
585 .llseek = seq_lseek,
586 .release = single_release,
587};
588
589/**
590 * debugfs_create_regset32 - create a debugfs file that returns register values
591 * @name: a pointer to a string containing the name of the file to create.
592 * @mode: the permission that the file should have
593 * @parent: a pointer to the parent dentry for this file. This should be a
594 * directory dentry if set. If this parameter is %NULL, then the
595 * file will be created in the root of the debugfs filesystem.
596 * @regset: a pointer to a struct debugfs_regset32, which contains a pointer
597 * to an array of register definitions, the array size and the base
598 * address where the register bank is to be found.
599 *
600 * This function creates a file in debugfs with the given name that reports
601 * the names and values of a set of 32-bit registers. If the @mode variable
602 * is so set it can be read from. Writing is not supported.
603 *
604 * This function will return a pointer to a dentry if it succeeds. This
605 * pointer must be passed to the debugfs_remove() function when the file is
606 * to be removed (no automatic cleanup happens if your module is unloaded,
607 * you are responsible here.) If an error occurs, %NULL will be returned.
608 *
609 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
610 * returned. It is not wise to check for this value, but rather, check for
611 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
612 * code.
613 */
614struct dentry *debugfs_create_regset32(const char *name, mode_t mode,
615 struct dentry *parent,
616 struct debugfs_regset32 *regset)
617{
618 return debugfs_create_file(name, mode, parent, regset, &fops_regset32);
619}
620EXPORT_SYMBOL_GPL(debugfs_create_regset32);
621
622#endif /* CONFIG_HAS_IOMEM */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index f3a257d7a985..956d5ddddf6e 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -30,7 +30,7 @@ static struct vfsmount *debugfs_mount;
30static int debugfs_mount_count; 30static int debugfs_mount_count;
31static bool debugfs_registered; 31static bool debugfs_registered;
32 32
33static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev, 33static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev_t dev,
34 void *data, const struct file_operations *fops) 34 void *data, const struct file_operations *fops)
35 35
36{ 36{
@@ -69,7 +69,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
69 69
70/* SMP-safe */ 70/* SMP-safe */
71static int debugfs_mknod(struct inode *dir, struct dentry *dentry, 71static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
72 int mode, dev_t dev, void *data, 72 umode_t mode, dev_t dev, void *data,
73 const struct file_operations *fops) 73 const struct file_operations *fops)
74{ 74{
75 struct inode *inode; 75 struct inode *inode;
@@ -87,7 +87,7 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry,
87 return error; 87 return error;
88} 88}
89 89
90static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode, 90static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode,
91 void *data, const struct file_operations *fops) 91 void *data, const struct file_operations *fops)
92{ 92{
93 int res; 93 int res;
@@ -101,14 +101,14 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode,
101 return res; 101 return res;
102} 102}
103 103
104static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode, 104static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode,
105 void *data, const struct file_operations *fops) 105 void *data, const struct file_operations *fops)
106{ 106{
107 mode = (mode & S_IALLUGO) | S_IFLNK; 107 mode = (mode & S_IALLUGO) | S_IFLNK;
108 return debugfs_mknod(dir, dentry, mode, 0, data, fops); 108 return debugfs_mknod(dir, dentry, mode, 0, data, fops);
109} 109}
110 110
111static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode, 111static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
112 void *data, const struct file_operations *fops) 112 void *data, const struct file_operations *fops)
113{ 113{
114 int res; 114 int res;
@@ -146,7 +146,7 @@ static struct file_system_type debug_fs_type = {
146 .kill_sb = kill_litter_super, 146 .kill_sb = kill_litter_super,
147}; 147};
148 148
149static int debugfs_create_by_name(const char *name, mode_t mode, 149static int debugfs_create_by_name(const char *name, umode_t mode,
150 struct dentry *parent, 150 struct dentry *parent,
151 struct dentry **dentry, 151 struct dentry **dentry,
152 void *data, 152 void *data,
@@ -160,7 +160,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
160 * have around. 160 * have around.
161 */ 161 */
162 if (!parent) 162 if (!parent)
163 parent = debugfs_mount->mnt_sb->s_root; 163 parent = debugfs_mount->mnt_root;
164 164
165 *dentry = NULL; 165 *dentry = NULL;
166 mutex_lock(&parent->d_inode->i_mutex); 166 mutex_lock(&parent->d_inode->i_mutex);
@@ -214,7 +214,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
214 * If debugfs is not enabled in the kernel, the value -%ENODEV will be 214 * If debugfs is not enabled in the kernel, the value -%ENODEV will be
215 * returned. 215 * returned.
216 */ 216 */
217struct dentry *debugfs_create_file(const char *name, mode_t mode, 217struct dentry *debugfs_create_file(const char *name, umode_t mode,
218 struct dentry *parent, void *data, 218 struct dentry *parent, void *data,
219 const struct file_operations *fops) 219 const struct file_operations *fops)
220{ 220{
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d5d5297efe97..c4e2a58a2e82 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -246,9 +246,9 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
246 return err; 246 return err;
247} 247}
248 248
249static int devpts_show_options(struct seq_file *seq, struct vfsmount *vfs) 249static int devpts_show_options(struct seq_file *seq, struct dentry *root)
250{ 250{
251 struct pts_fs_info *fsi = DEVPTS_SB(vfs->mnt_sb); 251 struct pts_fs_info *fsi = DEVPTS_SB(root->d_sb);
252 struct pts_mount_opts *opts = &fsi->mount_opts; 252 struct pts_mount_opts *opts = &fsi->mount_opts;
253 253
254 if (opts->setuid) 254 if (opts->setuid)
@@ -301,7 +301,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
301 301
302 inode = new_inode(s); 302 inode = new_inode(s);
303 if (!inode) 303 if (!inode)
304 goto free_fsi; 304 goto fail;
305 inode->i_ino = 1; 305 inode->i_ino = 1;
306 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 306 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR; 307 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
@@ -316,8 +316,6 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
316 printk(KERN_ERR "devpts: get root dentry failed\n"); 316 printk(KERN_ERR "devpts: get root dentry failed\n");
317 iput(inode); 317 iput(inode);
318 318
319free_fsi:
320 kfree(s->s_fs_info);
321fail: 319fail:
322 return -ENOMEM; 320 return -ENOMEM;
323} 321}
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 990626e7da80..0b3109ee4257 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -281,7 +281,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
281 } else { 281 } else {
282 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; 282 struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr;
283 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; 283 struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
284 ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr); 284 ret6->sin6_addr = in6->sin6_addr;
285 } 285 }
286 286
287 return 0; 287 return 0;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 32f90a3ae63e..19a8ca4ab1dd 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -144,24 +144,6 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
144} 144}
145 145
146/** 146/**
147 * ecryptfs_create_underlying_file
148 * @lower_dir_inode: inode of the parent in the lower fs of the new file
149 * @dentry: New file's dentry
150 * @mode: The mode of the new file
151 *
152 * Creates the file in the lower file system.
153 *
154 * Returns zero on success; non-zero on error condition
155 */
156static int
157ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
158 struct dentry *dentry, int mode)
159{
160 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
161 return vfs_create(lower_dir_inode, lower_dentry, mode, NULL);
162}
163
164/**
165 * ecryptfs_do_create 147 * ecryptfs_do_create
166 * @directory_inode: inode of the new file's dentry's parent in ecryptfs 148 * @directory_inode: inode of the new file's dentry's parent in ecryptfs
167 * @ecryptfs_dentry: New file's dentry in ecryptfs 149 * @ecryptfs_dentry: New file's dentry in ecryptfs
@@ -176,7 +158,7 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
176 */ 158 */
177static struct inode * 159static struct inode *
178ecryptfs_do_create(struct inode *directory_inode, 160ecryptfs_do_create(struct inode *directory_inode,
179 struct dentry *ecryptfs_dentry, int mode) 161 struct dentry *ecryptfs_dentry, umode_t mode)
180{ 162{
181 int rc; 163 int rc;
182 struct dentry *lower_dentry; 164 struct dentry *lower_dentry;
@@ -191,8 +173,7 @@ ecryptfs_do_create(struct inode *directory_inode,
191 inode = ERR_CAST(lower_dir_dentry); 173 inode = ERR_CAST(lower_dir_dentry);
192 goto out; 174 goto out;
193 } 175 }
194 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, 176 rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, NULL);
195 ecryptfs_dentry, mode);
196 if (rc) { 177 if (rc) {
197 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 178 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
198 "rc = [%d]\n", __func__, rc); 179 "rc = [%d]\n", __func__, rc);
@@ -267,7 +248,7 @@ out:
267 */ 248 */
268static int 249static int
269ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, 250ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
270 int mode, struct nameidata *nd) 251 umode_t mode, struct nameidata *nd)
271{ 252{
272 struct inode *ecryptfs_inode; 253 struct inode *ecryptfs_inode;
273 int rc; 254 int rc;
@@ -559,7 +540,7 @@ out_lock:
559 return rc; 540 return rc;
560} 541}
561 542
562static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 543static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
563{ 544{
564 int rc; 545 int rc;
565 struct dentry *lower_dentry; 546 struct dentry *lower_dentry;
@@ -607,7 +588,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
607} 588}
608 589
609static int 590static int
610ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 591ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
611{ 592{
612 int rc; 593 int rc;
613 struct dentry *lower_dentry; 594 struct dentry *lower_dentry;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index dbd52d40df4c..9df7fd6e0c39 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -69,7 +69,6 @@ static void ecryptfs_i_callback(struct rcu_head *head)
69 struct ecryptfs_inode_info *inode_info; 69 struct ecryptfs_inode_info *inode_info;
70 inode_info = ecryptfs_inode_to_private(inode); 70 inode_info = ecryptfs_inode_to_private(inode);
71 71
72 INIT_LIST_HEAD(&inode->i_dentry);
73 kmem_cache_free(ecryptfs_inode_info_cache, inode_info); 72 kmem_cache_free(ecryptfs_inode_info_cache, inode_info);
74} 73}
75 74
@@ -132,9 +131,9 @@ static void ecryptfs_evict_inode(struct inode *inode)
132 * Prints the mount options for a given superblock. 131 * Prints the mount options for a given superblock.
133 * Returns zero; does not fail. 132 * Returns zero; does not fail.
134 */ 133 */
135static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt) 134static int ecryptfs_show_options(struct seq_file *m, struct dentry *root)
136{ 135{
137 struct super_block *sb = mnt->mnt_sb; 136 struct super_block *sb = root->d_sb;
138 struct ecryptfs_mount_crypt_stat *mount_crypt_stat = 137 struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
139 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat; 138 &ecryptfs_superblock_to_private(sb)->mount_crypt_stat;
140 struct ecryptfs_global_auth_tok *walker; 139 struct ecryptfs_global_auth_tok *walker;
diff --git a/fs/efs/super.c b/fs/efs/super.c
index 0f31acb0131c..981106429a9f 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -68,7 +68,6 @@ static struct inode *efs_alloc_inode(struct super_block *sb)
68static void efs_i_callback(struct rcu_head *head) 68static void efs_i_callback(struct rcu_head *head)
69{ 69{
70 struct inode *inode = container_of(head, struct inode, i_rcu); 70 struct inode *inode = container_of(head, struct inode, i_rcu);
71 INIT_LIST_HEAD(&inode->i_dentry);
72 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); 71 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
73} 72}
74 73
diff --git a/fs/exec.c b/fs/exec.c
index 36254645b7cc..3f64b9f26e7d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1225,7 +1225,7 @@ EXPORT_SYMBOL(install_exec_creds);
1225 * - the caller must hold ->cred_guard_mutex to protect against 1225 * - the caller must hold ->cred_guard_mutex to protect against
1226 * PTRACE_ATTACH 1226 * PTRACE_ATTACH
1227 */ 1227 */
1228int check_unsafe_exec(struct linux_binprm *bprm) 1228static int check_unsafe_exec(struct linux_binprm *bprm)
1229{ 1229{
1230 struct task_struct *p = current, *t; 1230 struct task_struct *p = current, *t;
1231 unsigned n_fs; 1231 unsigned n_fs;
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index d0941c6a1f72..80405836ba6e 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -234,7 +234,7 @@ static unsigned char exofs_type_by_mode[S_IFMT >> S_SHIFT] = {
234static inline 234static inline
235void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode) 235void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode)
236{ 236{
237 mode_t mode = inode->i_mode; 237 umode_t mode = inode->i_mode;
238 de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 238 de->file_type = exofs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
239} 239}
240 240
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 51f4b4c40f09..ca9d49665ef6 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -154,7 +154,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
154 loff_t pos, unsigned len, unsigned flags, 154 loff_t pos, unsigned len, unsigned flags,
155 struct page **pagep, void **fsdata); 155 struct page **pagep, void **fsdata);
156extern struct inode *exofs_iget(struct super_block *, unsigned long); 156extern struct inode *exofs_iget(struct super_block *, unsigned long);
157struct inode *exofs_new_inode(struct inode *, int); 157struct inode *exofs_new_inode(struct inode *, umode_t);
158extern int exofs_write_inode(struct inode *, struct writeback_control *wbc); 158extern int exofs_write_inode(struct inode *, struct writeback_control *wbc);
159extern void exofs_evict_inode(struct inode *); 159extern void exofs_evict_inode(struct inode *);
160 160
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index f6dbf7768ce6..ea5e1f97806a 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1276,7 +1276,7 @@ static void create_done(struct ore_io_state *ios, void *p)
1276/* 1276/*
1277 * Set up a new inode and create an object for it on the OSD 1277 * Set up a new inode and create an object for it on the OSD
1278 */ 1278 */
1279struct inode *exofs_new_inode(struct inode *dir, int mode) 1279struct inode *exofs_new_inode(struct inode *dir, umode_t mode)
1280{ 1280{
1281 struct super_block *sb = dir->i_sb; 1281 struct super_block *sb = dir->i_sb;
1282 struct exofs_sb_info *sbi = sb->s_fs_info; 1282 struct exofs_sb_info *sbi = sb->s_fs_info;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b54c43775f17..9dbf0c301030 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -59,7 +59,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
59 return d_splice_alias(inode, dentry); 59 return d_splice_alias(inode, dentry);
60} 60}
61 61
62static int exofs_create(struct inode *dir, struct dentry *dentry, int mode, 62static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
63 struct nameidata *nd) 63 struct nameidata *nd)
64{ 64{
65 struct inode *inode = exofs_new_inode(dir, mode); 65 struct inode *inode = exofs_new_inode(dir, mode);
@@ -74,7 +74,7 @@ static int exofs_create(struct inode *dir, struct dentry *dentry, int mode,
74 return err; 74 return err;
75} 75}
76 76
77static int exofs_mknod(struct inode *dir, struct dentry *dentry, int mode, 77static int exofs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
78 dev_t rdev) 78 dev_t rdev)
79{ 79{
80 struct inode *inode; 80 struct inode *inode;
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
153 return exofs_add_nondir(dentry, inode); 153 return exofs_add_nondir(dentry, inode);
154} 154}
155 155
156static int exofs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 156static int exofs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
157{ 157{
158 struct inode *inode; 158 struct inode *inode;
159 int err = -EMLINK; 159 int err = -EMLINK;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index e6085ec192d6..d22cd168c6ee 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -166,7 +166,6 @@ static struct inode *exofs_alloc_inode(struct super_block *sb)
166static void exofs_i_callback(struct rcu_head *head) 166static void exofs_i_callback(struct rcu_head *head)
167{ 167{
168 struct inode *inode = container_of(head, struct inode, i_rcu); 168 struct inode *inode = container_of(head, struct inode, i_rcu);
169 INIT_LIST_HEAD(&inode->i_dentry);
170 kmem_cache_free(exofs_inode_cachep, exofs_i(inode)); 169 kmem_cache_free(exofs_inode_cachep, exofs_i(inode));
171} 170}
172 171
@@ -839,6 +838,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
839 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY); 838 ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
840 if (ret) { 839 if (ret) {
841 EXOFS_DBGMSG("Failed to bdi_setup_and_register\n"); 840 EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
841 dput(sb->s_root);
842 sb->s_root = NULL;
842 goto free_sbi; 843 goto free_sbi;
843 } 844 }
844 845
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 47cda410b548..d37df352d324 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -279,7 +279,7 @@ static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = {
279 279
280static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) 280static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode)
281{ 281{
282 mode_t mode = inode->i_mode; 282 umode_t mode = inode->i_mode;
283 if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE)) 283 if (EXT2_HAS_INCOMPAT_FEATURE(inode->i_sb, EXT2_FEATURE_INCOMPAT_FILETYPE))
284 de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 284 de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
285 else 285 else
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 9a4e5e206d08..75ad433c6691 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -110,7 +110,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int); 110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
111 111
112/* ialloc.c */ 112/* ialloc.c */
113extern struct inode * ext2_new_inode (struct inode *, int, const struct qstr *); 113extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *);
114extern void ext2_free_inode (struct inode *); 114extern void ext2_free_inode (struct inode *);
115extern unsigned long ext2_count_free_inodes (struct super_block *); 115extern unsigned long ext2_count_free_inodes (struct super_block *);
116extern void ext2_check_inodes_bitmap (struct super_block *); 116extern void ext2_check_inodes_bitmap (struct super_block *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index c4e81dfb74ba..cd7f5f424a75 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -429,7 +429,7 @@ found:
429 return group; 429 return group;
430} 430}
431 431
432struct inode *ext2_new_inode(struct inode *dir, int mode, 432struct inode *ext2_new_inode(struct inode *dir, umode_t mode,
433 const struct qstr *qstr) 433 const struct qstr *qstr)
434{ 434{
435 struct super_block *sb; 435 struct super_block *sb;
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index f81e250ac5c4..1089f760c847 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -35,7 +35,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
35 case EXT2_IOC_SETFLAGS: { 35 case EXT2_IOC_SETFLAGS: {
36 unsigned int oldflags; 36 unsigned int oldflags;
37 37
38 ret = mnt_want_write(filp->f_path.mnt); 38 ret = mnt_want_write_file(filp);
39 if (ret) 39 if (ret)
40 return ret; 40 return ret;
41 41
@@ -83,7 +83,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
83 inode->i_ctime = CURRENT_TIME_SEC; 83 inode->i_ctime = CURRENT_TIME_SEC;
84 mark_inode_dirty(inode); 84 mark_inode_dirty(inode);
85setflags_out: 85setflags_out:
86 mnt_drop_write(filp->f_path.mnt); 86 mnt_drop_write_file(filp);
87 return ret; 87 return ret;
88 } 88 }
89 case EXT2_IOC_GETVERSION: 89 case EXT2_IOC_GETVERSION:
@@ -91,7 +91,7 @@ setflags_out:
91 case EXT2_IOC_SETVERSION: 91 case EXT2_IOC_SETVERSION:
92 if (!inode_owner_or_capable(inode)) 92 if (!inode_owner_or_capable(inode))
93 return -EPERM; 93 return -EPERM;
94 ret = mnt_want_write(filp->f_path.mnt); 94 ret = mnt_want_write_file(filp);
95 if (ret) 95 if (ret)
96 return ret; 96 return ret;
97 if (get_user(inode->i_generation, (int __user *) arg)) { 97 if (get_user(inode->i_generation, (int __user *) arg)) {
@@ -100,7 +100,7 @@ setflags_out:
100 inode->i_ctime = CURRENT_TIME_SEC; 100 inode->i_ctime = CURRENT_TIME_SEC;
101 mark_inode_dirty(inode); 101 mark_inode_dirty(inode);
102 } 102 }
103 mnt_drop_write(filp->f_path.mnt); 103 mnt_drop_write_file(filp);
104 return ret; 104 return ret;
105 case EXT2_IOC_GETRSVSZ: 105 case EXT2_IOC_GETRSVSZ:
106 if (test_opt(inode->i_sb, RESERVATION) 106 if (test_opt(inode->i_sb, RESERVATION)
@@ -121,7 +121,7 @@ setflags_out:
121 if (get_user(rsv_window_size, (int __user *)arg)) 121 if (get_user(rsv_window_size, (int __user *)arg))
122 return -EFAULT; 122 return -EFAULT;
123 123
124 ret = mnt_want_write(filp->f_path.mnt); 124 ret = mnt_want_write_file(filp);
125 if (ret) 125 if (ret)
126 return ret; 126 return ret;
127 127
@@ -145,7 +145,7 @@ setflags_out:
145 rsv->rsv_goal_size = rsv_window_size; 145 rsv->rsv_goal_size = rsv_window_size;
146 } 146 }
147 mutex_unlock(&ei->truncate_mutex); 147 mutex_unlock(&ei->truncate_mutex);
148 mnt_drop_write(filp->f_path.mnt); 148 mnt_drop_write_file(filp);
149 return 0; 149 return 0;
150 } 150 }
151 default: 151 default:
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 761fde807fc9..080419814bae 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child)
94 * If the create succeeds, we fill in the inode information 94 * If the create succeeds, we fill in the inode information
95 * with d_instantiate(). 95 * with d_instantiate().
96 */ 96 */
97static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) 97static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
98{ 98{
99 struct inode *inode; 99 struct inode *inode;
100 100
@@ -119,7 +119,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
119 return ext2_add_nondir(dentry, inode); 119 return ext2_add_nondir(dentry, inode);
120} 120}
121 121
122static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 122static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
123{ 123{
124 struct inode * inode; 124 struct inode * inode;
125 int err; 125 int err;
@@ -214,7 +214,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
214 return err; 214 return err;
215} 215}
216 216
217static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) 217static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
218{ 218{
219 struct inode * inode; 219 struct inode * inode;
220 int err = -EMLINK; 220 int err = -EMLINK;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index bd8ac164a3bf..9b403f064ce0 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -173,7 +173,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb)
173static void ext2_i_callback(struct rcu_head *head) 173static void ext2_i_callback(struct rcu_head *head)
174{ 174{
175 struct inode *inode = container_of(head, struct inode, i_rcu); 175 struct inode *inode = container_of(head, struct inode, i_rcu);
176 INIT_LIST_HEAD(&inode->i_dentry);
177 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); 176 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
178} 177}
179 178
@@ -211,9 +210,9 @@ static void destroy_inodecache(void)
211 kmem_cache_destroy(ext2_inode_cachep); 210 kmem_cache_destroy(ext2_inode_cachep);
212} 211}
213 212
214static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) 213static int ext2_show_options(struct seq_file *seq, struct dentry *root)
215{ 214{
216 struct super_block *sb = vfs->mnt_sb; 215 struct super_block *sb = root->d_sb;
217 struct ext2_sb_info *sbi = EXT2_SB(sb); 216 struct ext2_sb_info *sbi = EXT2_SB(sb);
218 struct ext2_super_block *es = sbi->s_es; 217 struct ext2_super_block *es = sbi->s_es;
219 unsigned long def_mount_opts; 218 unsigned long def_mount_opts;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 5c866e06e7ab..92cc86dfa23d 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -371,7 +371,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
371 * group to find a free inode. 371 * group to find a free inode.
372 */ 372 */
373struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, 373struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
374 const struct qstr *qstr, int mode) 374 const struct qstr *qstr, umode_t mode)
375{ 375{
376 struct super_block *sb; 376 struct super_block *sb;
377 struct buffer_head *bitmap_bh = NULL; 377 struct buffer_head *bitmap_bh = NULL;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 85fe655fe3e0..15cb47088aac 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2490,7 +2490,7 @@ int ext3_can_truncate(struct inode *inode)
2490 * transaction, and VFS/VM ensures that ext3_truncate() cannot run 2490 * transaction, and VFS/VM ensures that ext3_truncate() cannot run
2491 * simultaneously on behalf of the same inode. 2491 * simultaneously on behalf of the same inode.
2492 * 2492 *
2493 * As we work through the truncate and commmit bits of it to the journal there 2493 * As we work through the truncate and commit bits of it to the journal there
2494 * is one core, guiding principle: the file's tree must always be consistent on 2494 * is one core, guiding principle: the file's tree must always be consistent on
2495 * disk. We must be able to restart the truncate after a crash. 2495 * disk. We must be able to restart the truncate after a crash.
2496 * 2496 *
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index ba1b54e23cae..8e37c41a071b 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -44,7 +44,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
44 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
45 return -EFAULT; 45 return -EFAULT;
46 46
47 err = mnt_want_write(filp->f_path.mnt); 47 err = mnt_want_write_file(filp);
48 if (err) 48 if (err)
49 return err; 49 return err;
50 50
@@ -110,7 +110,7 @@ flags_err:
110 err = ext3_change_inode_journal_flag(inode, jflag); 110 err = ext3_change_inode_journal_flag(inode, jflag);
111flags_out: 111flags_out:
112 mutex_unlock(&inode->i_mutex); 112 mutex_unlock(&inode->i_mutex);
113 mnt_drop_write(filp->f_path.mnt); 113 mnt_drop_write_file(filp);
114 return err; 114 return err;
115 } 115 }
116 case EXT3_IOC_GETVERSION: 116 case EXT3_IOC_GETVERSION:
@@ -126,7 +126,7 @@ flags_out:
126 if (!inode_owner_or_capable(inode)) 126 if (!inode_owner_or_capable(inode))
127 return -EPERM; 127 return -EPERM;
128 128
129 err = mnt_want_write(filp->f_path.mnt); 129 err = mnt_want_write_file(filp);
130 if (err) 130 if (err)
131 return err; 131 return err;
132 if (get_user(generation, (int __user *) arg)) { 132 if (get_user(generation, (int __user *) arg)) {
@@ -147,7 +147,7 @@ flags_out:
147 } 147 }
148 ext3_journal_stop(handle); 148 ext3_journal_stop(handle);
149setversion_out: 149setversion_out:
150 mnt_drop_write(filp->f_path.mnt); 150 mnt_drop_write_file(filp);
151 return err; 151 return err;
152 } 152 }
153 case EXT3_IOC_GETRSVSZ: 153 case EXT3_IOC_GETRSVSZ:
@@ -164,7 +164,7 @@ setversion_out:
164 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 164 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
165 return -ENOTTY; 165 return -ENOTTY;
166 166
167 err = mnt_want_write(filp->f_path.mnt); 167 err = mnt_want_write_file(filp);
168 if (err) 168 if (err)
169 return err; 169 return err;
170 170
@@ -195,7 +195,7 @@ setversion_out:
195 } 195 }
196 mutex_unlock(&ei->truncate_mutex); 196 mutex_unlock(&ei->truncate_mutex);
197setrsvsz_out: 197setrsvsz_out:
198 mnt_drop_write(filp->f_path.mnt); 198 mnt_drop_write_file(filp);
199 return err; 199 return err;
200 } 200 }
201 case EXT3_IOC_GROUP_EXTEND: { 201 case EXT3_IOC_GROUP_EXTEND: {
@@ -206,7 +206,7 @@ setrsvsz_out:
206 if (!capable(CAP_SYS_RESOURCE)) 206 if (!capable(CAP_SYS_RESOURCE))
207 return -EPERM; 207 return -EPERM;
208 208
209 err = mnt_want_write(filp->f_path.mnt); 209 err = mnt_want_write_file(filp);
210 if (err) 210 if (err)
211 return err; 211 return err;
212 212
@@ -221,7 +221,7 @@ setrsvsz_out:
221 if (err == 0) 221 if (err == 0)
222 err = err2; 222 err = err2;
223group_extend_out: 223group_extend_out:
224 mnt_drop_write(filp->f_path.mnt); 224 mnt_drop_write_file(filp);
225 return err; 225 return err;
226 } 226 }
227 case EXT3_IOC_GROUP_ADD: { 227 case EXT3_IOC_GROUP_ADD: {
@@ -232,7 +232,7 @@ group_extend_out:
232 if (!capable(CAP_SYS_RESOURCE)) 232 if (!capable(CAP_SYS_RESOURCE))
233 return -EPERM; 233 return -EPERM;
234 234
235 err = mnt_want_write(filp->f_path.mnt); 235 err = mnt_want_write_file(filp);
236 if (err) 236 if (err)
237 return err; 237 return err;
238 238
@@ -249,7 +249,7 @@ group_extend_out:
249 if (err == 0) 249 if (err == 0)
250 err = err2; 250 err = err2;
251group_add_out: 251group_add_out:
252 mnt_drop_write(filp->f_path.mnt); 252 mnt_drop_write_file(filp);
253 return err; 253 return err;
254 } 254 }
255 case FITRIM: { 255 case FITRIM: {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 642dc6d66dfd..d269821203fd 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1698,7 +1698,7 @@ static int ext3_add_nondir(handle_t *handle,
1698 * If the create succeeds, we fill in the inode information 1698 * If the create succeeds, we fill in the inode information
1699 * with d_instantiate(). 1699 * with d_instantiate().
1700 */ 1700 */
1701static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, 1701static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode,
1702 struct nameidata *nd) 1702 struct nameidata *nd)
1703{ 1703{
1704 handle_t *handle; 1704 handle_t *handle;
@@ -1732,7 +1732,7 @@ retry:
1732} 1732}
1733 1733
1734static int ext3_mknod (struct inode * dir, struct dentry *dentry, 1734static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1735 int mode, dev_t rdev) 1735 umode_t mode, dev_t rdev)
1736{ 1736{
1737 handle_t *handle; 1737 handle_t *handle;
1738 struct inode *inode; 1738 struct inode *inode;
@@ -1768,7 +1768,7 @@ retry:
1768 return err; 1768 return err;
1769} 1769}
1770 1770
1771static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode) 1771static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
1772{ 1772{
1773 handle_t *handle; 1773 handle_t *handle;
1774 struct inode * inode; 1774 struct inode * inode;
@@ -2272,7 +2272,7 @@ retry:
2272 err = PTR_ERR(handle); 2272 err = PTR_ERR(handle);
2273 goto err_drop_inode; 2273 goto err_drop_inode;
2274 } 2274 }
2275 inc_nlink(inode); 2275 set_nlink(inode, 1);
2276 err = ext3_orphan_del(handle, inode); 2276 err = ext3_orphan_del(handle, inode);
2277 if (err) { 2277 if (err) {
2278 ext3_journal_stop(handle); 2278 ext3_journal_stop(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 922d289aeeb3..3a10b884e1be 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -511,7 +511,6 @@ static int ext3_drop_inode(struct inode *inode)
511static void ext3_i_callback(struct rcu_head *head) 511static void ext3_i_callback(struct rcu_head *head)
512{ 512{
513 struct inode *inode = container_of(head, struct inode, i_rcu); 513 struct inode *inode = container_of(head, struct inode, i_rcu);
514 INIT_LIST_HEAD(&inode->i_dentry);
515 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 514 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
516} 515}
517 516
@@ -611,9 +610,9 @@ static char *data_mode_string(unsigned long mode)
611 * - it's set to a non-default value OR 610 * - it's set to a non-default value OR
612 * - if the per-sb default is different from the global default 611 * - if the per-sb default is different from the global default
613 */ 612 */
614static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) 613static int ext3_show_options(struct seq_file *seq, struct dentry *root)
615{ 614{
616 struct super_block *sb = vfs->mnt_sb; 615 struct super_block *sb = root->d_sb;
617 struct ext3_sb_info *sbi = EXT3_SB(sb); 616 struct ext3_sb_info *sbi = EXT3_SB(sb);
618 struct ext3_super_block *es = sbi->s_es; 617 struct ext3_super_block *es = sbi->s_es;
619 unsigned long def_mount_opts; 618 unsigned long def_mount_opts;
@@ -2910,7 +2909,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2910 return -EINVAL; 2909 return -EINVAL;
2911 2910
2912 /* Quotafile not on the same filesystem? */ 2911 /* Quotafile not on the same filesystem? */
2913 if (path->mnt->mnt_sb != sb) 2912 if (path->dentry->d_sb != sb)
2914 return -EXDEV; 2913 return -EXDEV;
2915 /* Journaling quota? */ 2914 /* Journaling quota? */
2916 if (EXT3_SB(sb)->s_qf_names[type]) { 2915 if (EXT3_SB(sb)->s_qf_names[type]) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5b0e26a1272d..1554b15f91bc 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1819,7 +1819,7 @@ extern int ext4fs_dirhash(const char *name, int len, struct
1819 dx_hash_info *hinfo); 1819 dx_hash_info *hinfo);
1820 1820
1821/* ialloc.c */ 1821/* ialloc.c */
1822extern struct inode *ext4_new_inode(handle_t *, struct inode *, int, 1822extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t,
1823 const struct qstr *qstr, __u32 goal, 1823 const struct qstr *qstr, __u32 goal,
1824 uid_t *owner); 1824 uid_t *owner);
1825extern void ext4_free_inode(handle_t *, struct inode *); 1825extern void ext4_free_inode(handle_t *, struct inode *);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 61fa9e1614af..607b1557d292 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1095,7 +1095,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1095 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1095 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1096 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1096 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1097 1097
1098 neh->eh_depth = cpu_to_le16(neh->eh_depth + 1); 1098 neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1);
1099 ext4_mark_inode_dirty(handle, inode); 1099 ext4_mark_inode_dirty(handle, inode);
1100out: 1100out:
1101 brelse(bh); 1101 brelse(bh);
@@ -2955,7 +2955,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2955 /* Pre-conditions */ 2955 /* Pre-conditions */
2956 BUG_ON(!ext4_ext_is_uninitialized(ex)); 2956 BUG_ON(!ext4_ext_is_uninitialized(ex));
2957 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); 2957 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
2958 BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
2959 2958
2960 /* 2959 /*
2961 * Attempt to transfer newly initialized blocks from the currently 2960 * Attempt to transfer newly initialized blocks from the currently
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 00beb4f9cc4f..4637af036d9c 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -351,7 +351,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
351 */ 351 */
352 352
353static int find_group_orlov(struct super_block *sb, struct inode *parent, 353static int find_group_orlov(struct super_block *sb, struct inode *parent,
354 ext4_group_t *group, int mode, 354 ext4_group_t *group, umode_t mode,
355 const struct qstr *qstr) 355 const struct qstr *qstr)
356{ 356{
357 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 357 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
@@ -497,7 +497,7 @@ fallback_retry:
497} 497}
498 498
499static int find_group_other(struct super_block *sb, struct inode *parent, 499static int find_group_other(struct super_block *sb, struct inode *parent,
500 ext4_group_t *group, int mode) 500 ext4_group_t *group, umode_t mode)
501{ 501{
502 ext4_group_t parent_group = EXT4_I(parent)->i_block_group; 502 ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
503 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); 503 ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
@@ -602,7 +602,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
602 */ 602 */
603static int ext4_claim_inode(struct super_block *sb, 603static int ext4_claim_inode(struct super_block *sb,
604 struct buffer_head *inode_bitmap_bh, 604 struct buffer_head *inode_bitmap_bh,
605 unsigned long ino, ext4_group_t group, int mode) 605 unsigned long ino, ext4_group_t group, umode_t mode)
606{ 606{
607 int free = 0, retval = 0, count; 607 int free = 0, retval = 0, count;
608 struct ext4_sb_info *sbi = EXT4_SB(sb); 608 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -690,7 +690,7 @@ err_ret:
690 * For other inodes, search forward from the parent directory's block 690 * For other inodes, search forward from the parent directory's block
691 * group to find a free inode. 691 * group to find a free inode.
692 */ 692 */
693struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, 693struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode,
694 const struct qstr *qstr, __u32 goal, uid_t *owner) 694 const struct qstr *qstr, __u32 goal, uid_t *owner)
695{ 695{
696 struct super_block *sb; 696 struct super_block *sb;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 848f436df29f..7dbcc3e84570 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1339,8 +1339,11 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
1339 clear_buffer_unwritten(bh); 1339 clear_buffer_unwritten(bh);
1340 } 1340 }
1341 1341
1342 /* skip page if block allocation undone */ 1342 /*
1343 if (buffer_delay(bh) || buffer_unwritten(bh)) 1343 * skip page if block allocation undone and
1344 * block is dirty
1345 */
1346 if (ext4_bh_delay_or_unwritten(NULL, bh))
1344 skip_page = 1; 1347 skip_page = 1;
1345 bh = bh->b_this_page; 1348 bh = bh->b_this_page;
1346 block_start += bh->b_size; 1349 block_start += bh->b_size;
@@ -1878,7 +1881,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
1878 * a[0] = 'a'; 1881 * a[0] = 'a';
1879 * truncate(f, 4096); 1882 * truncate(f, 4096);
1880 * we have in the page first buffer_head mapped via page_mkwrite call back 1883 * we have in the page first buffer_head mapped via page_mkwrite call back
1881 * but other bufer_heads would be unmapped but dirty(dirty done via the 1884 * but other buffer_heads would be unmapped but dirty (dirty done via the
1882 * do_wp_page). So writepage should write the first block. If we modify 1885 * do_wp_page). So writepage should write the first block. If we modify
1883 * the mmap area beyond 1024 we will again get a page_fault and the 1886 * the mmap area beyond 1024 we will again get a page_fault and the
1884 * page_mkwrite callback will do the block allocation and mark the 1887 * page_mkwrite callback will do the block allocation and mark the
@@ -2387,7 +2390,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2387 pgoff_t index; 2390 pgoff_t index;
2388 struct inode *inode = mapping->host; 2391 struct inode *inode = mapping->host;
2389 handle_t *handle; 2392 handle_t *handle;
2390 loff_t page_len;
2391 2393
2392 index = pos >> PAGE_CACHE_SHIFT; 2394 index = pos >> PAGE_CACHE_SHIFT;
2393 2395
@@ -2434,13 +2436,6 @@ retry:
2434 */ 2436 */
2435 if (pos + len > inode->i_size) 2437 if (pos + len > inode->i_size)
2436 ext4_truncate_failed_write(inode); 2438 ext4_truncate_failed_write(inode);
2437 } else {
2438 page_len = pos & (PAGE_CACHE_SIZE - 1);
2439 if (page_len > 0) {
2440 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2441 inode, page, pos - page_len, page_len,
2442 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2443 }
2444 } 2439 }
2445 2440
2446 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 2441 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -2483,7 +2478,6 @@ static int ext4_da_write_end(struct file *file,
2483 loff_t new_i_size; 2478 loff_t new_i_size;
2484 unsigned long start, end; 2479 unsigned long start, end;
2485 int write_mode = (int)(unsigned long)fsdata; 2480 int write_mode = (int)(unsigned long)fsdata;
2486 loff_t page_len;
2487 2481
2488 if (write_mode == FALL_BACK_TO_NONDELALLOC) { 2482 if (write_mode == FALL_BACK_TO_NONDELALLOC) {
2489 if (ext4_should_order_data(inode)) { 2483 if (ext4_should_order_data(inode)) {
@@ -2508,7 +2502,7 @@ static int ext4_da_write_end(struct file *file,
2508 */ 2502 */
2509 2503
2510 new_i_size = pos + copied; 2504 new_i_size = pos + copied;
2511 if (new_i_size > EXT4_I(inode)->i_disksize) { 2505 if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
2512 if (ext4_da_should_update_i_disksize(page, end)) { 2506 if (ext4_da_should_update_i_disksize(page, end)) {
2513 down_write(&EXT4_I(inode)->i_data_sem); 2507 down_write(&EXT4_I(inode)->i_data_sem);
2514 if (new_i_size > EXT4_I(inode)->i_disksize) { 2508 if (new_i_size > EXT4_I(inode)->i_disksize) {
@@ -2532,16 +2526,6 @@ static int ext4_da_write_end(struct file *file,
2532 } 2526 }
2533 ret2 = generic_write_end(file, mapping, pos, len, copied, 2527 ret2 = generic_write_end(file, mapping, pos, len, copied,
2534 page, fsdata); 2528 page, fsdata);
2535
2536 page_len = PAGE_CACHE_SIZE -
2537 ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
2538
2539 if (page_len > 0) {
2540 ret = ext4_discard_partial_page_buffers_no_lock(handle,
2541 inode, page, pos + copied - 1, page_len,
2542 EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
2543 }
2544
2545 copied = ret2; 2529 copied = ret2;
2546 if (ret2 < 0) 2530 if (ret2 < 0)
2547 ret = ret2; 2531 ret = ret2;
@@ -2781,10 +2765,11 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2781 iocb->private, io_end->inode->i_ino, iocb, offset, 2765 iocb->private, io_end->inode->i_ino, iocb, offset,
2782 size); 2766 size);
2783 2767
2768 iocb->private = NULL;
2769
2784 /* if not aio dio with unwritten extents, just free io and return */ 2770 /* if not aio dio with unwritten extents, just free io and return */
2785 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 2771 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
2786 ext4_free_io_end(io_end); 2772 ext4_free_io_end(io_end);
2787 iocb->private = NULL;
2788out: 2773out:
2789 if (is_async) 2774 if (is_async)
2790 aio_complete(iocb, ret, 0); 2775 aio_complete(iocb, ret, 0);
@@ -2807,7 +2792,6 @@ out:
2807 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 2792 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
2808 2793
2809 /* queue the work to convert unwritten extents to written */ 2794 /* queue the work to convert unwritten extents to written */
2810 iocb->private = NULL;
2811 queue_work(wq, &io_end->work); 2795 queue_work(wq, &io_end->work);
2812 2796
2813 /* XXX: probably should move into the real I/O completion handler */ 2797 /* XXX: probably should move into the real I/O completion handler */
@@ -3203,26 +3187,8 @@ int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3203 3187
3204 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); 3188 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3205 3189
3206 if (!page_has_buffers(page)) { 3190 if (!page_has_buffers(page))
3207 /* 3191 create_empty_buffers(page, blocksize, 0);
3208 * If the range to be discarded covers a partial block
3209 * we need to get the page buffers. This is because
3210 * partial blocks cannot be released and the page needs
3211 * to be updated with the contents of the block before
3212 * we write the zeros on top of it.
3213 */
3214 if ((from & (blocksize - 1)) ||
3215 ((from + length) & (blocksize - 1))) {
3216 create_empty_buffers(page, blocksize, 0);
3217 } else {
3218 /*
3219 * If there are no partial blocks,
3220 * there is nothing to update,
3221 * so we can return now
3222 */
3223 return 0;
3224 }
3225 }
3226 3192
3227 /* Find the buffer that contains "offset" */ 3193 /* Find the buffer that contains "offset" */
3228 bh = page_buffers(page); 3194 bh = page_buffers(page);
@@ -3503,7 +3469,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
3503 * transaction, and VFS/VM ensures that ext4_truncate() cannot run 3469 * transaction, and VFS/VM ensures that ext4_truncate() cannot run
3504 * simultaneously on behalf of the same inode. 3470 * simultaneously on behalf of the same inode.
3505 * 3471 *
3506 * As we work through the truncate and commmit bits of it to the journal there 3472 * As we work through the truncate and commit bits of it to the journal there
3507 * is one core, guiding principle: the file's tree must always be consistent on 3473 * is one core, guiding principle: the file's tree must always be consistent on
3508 * disk. We must be able to restart the truncate after a crash. 3474 * disk. We must be able to restart the truncate after a crash.
3509 * 3475 *
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a56796814d6a..d37b3bb2a3b8 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -45,7 +45,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
45 if (get_user(flags, (int __user *) arg)) 45 if (get_user(flags, (int __user *) arg))
46 return -EFAULT; 46 return -EFAULT;
47 47
48 err = mnt_want_write(filp->f_path.mnt); 48 err = mnt_want_write_file(filp);
49 if (err) 49 if (err)
50 return err; 50 return err;
51 51
@@ -134,7 +134,7 @@ flags_err:
134 err = ext4_ext_migrate(inode); 134 err = ext4_ext_migrate(inode);
135flags_out: 135flags_out:
136 mutex_unlock(&inode->i_mutex); 136 mutex_unlock(&inode->i_mutex);
137 mnt_drop_write(filp->f_path.mnt); 137 mnt_drop_write_file(filp);
138 return err; 138 return err;
139 } 139 }
140 case EXT4_IOC_GETVERSION: 140 case EXT4_IOC_GETVERSION:
@@ -150,7 +150,7 @@ flags_out:
150 if (!inode_owner_or_capable(inode)) 150 if (!inode_owner_or_capable(inode))
151 return -EPERM; 151 return -EPERM;
152 152
153 err = mnt_want_write(filp->f_path.mnt); 153 err = mnt_want_write_file(filp);
154 if (err) 154 if (err)
155 return err; 155 return err;
156 if (get_user(generation, (int __user *) arg)) { 156 if (get_user(generation, (int __user *) arg)) {
@@ -171,7 +171,7 @@ flags_out:
171 } 171 }
172 ext4_journal_stop(handle); 172 ext4_journal_stop(handle);
173setversion_out: 173setversion_out:
174 mnt_drop_write(filp->f_path.mnt); 174 mnt_drop_write_file(filp);
175 return err; 175 return err;
176 } 176 }
177 case EXT4_IOC_GROUP_EXTEND: { 177 case EXT4_IOC_GROUP_EXTEND: {
@@ -192,7 +192,7 @@ setversion_out:
192 return -EOPNOTSUPP; 192 return -EOPNOTSUPP;
193 } 193 }
194 194
195 err = mnt_want_write(filp->f_path.mnt); 195 err = mnt_want_write_file(filp);
196 if (err) 196 if (err)
197 return err; 197 return err;
198 198
@@ -204,7 +204,7 @@ setversion_out:
204 } 204 }
205 if (err == 0) 205 if (err == 0)
206 err = err2; 206 err = err2;
207 mnt_drop_write(filp->f_path.mnt); 207 mnt_drop_write_file(filp);
208 ext4_resize_end(sb); 208 ext4_resize_end(sb);
209 209
210 return err; 210 return err;
@@ -240,13 +240,13 @@ setversion_out:
240 return -EOPNOTSUPP; 240 return -EOPNOTSUPP;
241 } 241 }
242 242
243 err = mnt_want_write(filp->f_path.mnt); 243 err = mnt_want_write_file(filp);
244 if (err) 244 if (err)
245 goto mext_out; 245 goto mext_out;
246 246
247 err = ext4_move_extents(filp, donor_filp, me.orig_start, 247 err = ext4_move_extents(filp, donor_filp, me.orig_start,
248 me.donor_start, me.len, &me.moved_len); 248 me.donor_start, me.len, &me.moved_len);
249 mnt_drop_write(filp->f_path.mnt); 249 mnt_drop_write_file(filp);
250 if (me.moved_len > 0) 250 if (me.moved_len > 0)
251 file_remove_suid(donor_filp); 251 file_remove_suid(donor_filp);
252 252
@@ -277,7 +277,7 @@ mext_out:
277 return -EOPNOTSUPP; 277 return -EOPNOTSUPP;
278 } 278 }
279 279
280 err = mnt_want_write(filp->f_path.mnt); 280 err = mnt_want_write_file(filp);
281 if (err) 281 if (err)
282 return err; 282 return err;
283 283
@@ -289,7 +289,7 @@ mext_out:
289 } 289 }
290 if (err == 0) 290 if (err == 0)
291 err = err2; 291 err = err2;
292 mnt_drop_write(filp->f_path.mnt); 292 mnt_drop_write_file(filp);
293 ext4_resize_end(sb); 293 ext4_resize_end(sb);
294 294
295 return err; 295 return err;
@@ -301,7 +301,7 @@ mext_out:
301 if (!inode_owner_or_capable(inode)) 301 if (!inode_owner_or_capable(inode))
302 return -EACCES; 302 return -EACCES;
303 303
304 err = mnt_want_write(filp->f_path.mnt); 304 err = mnt_want_write_file(filp);
305 if (err) 305 if (err)
306 return err; 306 return err;
307 /* 307 /*
@@ -313,7 +313,7 @@ mext_out:
313 mutex_lock(&(inode->i_mutex)); 313 mutex_lock(&(inode->i_mutex));
314 err = ext4_ext_migrate(inode); 314 err = ext4_ext_migrate(inode);
315 mutex_unlock(&(inode->i_mutex)); 315 mutex_unlock(&(inode->i_mutex));
316 mnt_drop_write(filp->f_path.mnt); 316 mnt_drop_write_file(filp);
317 return err; 317 return err;
318 } 318 }
319 319
@@ -323,11 +323,11 @@ mext_out:
323 if (!inode_owner_or_capable(inode)) 323 if (!inode_owner_or_capable(inode))
324 return -EACCES; 324 return -EACCES;
325 325
326 err = mnt_want_write(filp->f_path.mnt); 326 err = mnt_want_write_file(filp);
327 if (err) 327 if (err)
328 return err; 328 return err;
329 err = ext4_alloc_da_blocks(inode); 329 err = ext4_alloc_da_blocks(inode);
330 mnt_drop_write(filp->f_path.mnt); 330 mnt_drop_write_file(filp);
331 return err; 331 return err;
332 } 332 }
333 333
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index aa4c782c9dd7..2043f482375d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1736,7 +1736,7 @@ static int ext4_add_nondir(handle_t *handle,
1736 * If the create succeeds, we fill in the inode information 1736 * If the create succeeds, we fill in the inode information
1737 * with d_instantiate(). 1737 * with d_instantiate().
1738 */ 1738 */
1739static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, 1739static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
1740 struct nameidata *nd) 1740 struct nameidata *nd)
1741{ 1741{
1742 handle_t *handle; 1742 handle_t *handle;
@@ -1770,7 +1770,7 @@ retry:
1770} 1770}
1771 1771
1772static int ext4_mknod(struct inode *dir, struct dentry *dentry, 1772static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1773 int mode, dev_t rdev) 1773 umode_t mode, dev_t rdev)
1774{ 1774{
1775 handle_t *handle; 1775 handle_t *handle;
1776 struct inode *inode; 1776 struct inode *inode;
@@ -1806,7 +1806,7 @@ retry:
1806 return err; 1806 return err;
1807} 1807}
1808 1808
1809static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1809static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1810{ 1810{
1811 handle_t *handle; 1811 handle_t *handle;
1812 struct inode *inode; 1812 struct inode *inode;
@@ -2315,7 +2315,7 @@ retry:
2315 err = PTR_ERR(handle); 2315 err = PTR_ERR(handle);
2316 goto err_drop_inode; 2316 goto err_drop_inode;
2317 } 2317 }
2318 inc_nlink(inode); 2318 set_nlink(inode, 1);
2319 err = ext4_orphan_del(handle, inode); 2319 err = ext4_orphan_del(handle, inode);
2320 if (err) { 2320 if (err) {
2321 ext4_journal_stop(handle); 2321 ext4_journal_stop(handle);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7ce1d0b19c94..7e106c810c62 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -385,6 +385,18 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
385 385
386 block_end = block_start + blocksize; 386 block_end = block_start + blocksize;
387 if (block_start >= len) { 387 if (block_start >= len) {
388 /*
389 * Comments copied from block_write_full_page_endio:
390 *
391 * The page straddles i_size. It must be zeroed out on
392 * each and every writepage invocation because it may
393 * be mmapped. "A file is mapped in multiples of the
394 * page size. For a file that is not a multiple of
395 * the page size, the remaining memory is zeroed when
396 * mapped, and writes to that region are not written
397 * out to the file."
398 */
399 zero_user_segment(page, block_start, block_end);
388 clear_buffer_dirty(bh); 400 clear_buffer_dirty(bh);
389 set_buffer_uptodate(bh); 401 set_buffer_uptodate(bh);
390 continue; 402 continue;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3858767ec672..64e2529ae9bb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -930,7 +930,6 @@ static int ext4_drop_inode(struct inode *inode)
930static void ext4_i_callback(struct rcu_head *head) 930static void ext4_i_callback(struct rcu_head *head)
931{ 931{
932 struct inode *inode = container_of(head, struct inode, i_rcu); 932 struct inode *inode = container_of(head, struct inode, i_rcu);
933 INIT_LIST_HEAD(&inode->i_dentry);
934 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 933 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
935} 934}
936 935
@@ -1033,11 +1032,11 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
1033 * - it's set to a non-default value OR 1032 * - it's set to a non-default value OR
1034 * - if the per-sb default is different from the global default 1033 * - if the per-sb default is different from the global default
1035 */ 1034 */
1036static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) 1035static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1037{ 1036{
1038 int def_errors; 1037 int def_errors;
1039 unsigned long def_mount_opts; 1038 unsigned long def_mount_opts;
1040 struct super_block *sb = vfs->mnt_sb; 1039 struct super_block *sb = root->d_sb;
1041 struct ext4_sb_info *sbi = EXT4_SB(sb); 1040 struct ext4_sb_info *sbi = EXT4_SB(sb);
1042 struct ext4_super_block *es = sbi->s_es; 1041 struct ext4_super_block *es = sbi->s_es;
1043 1042
@@ -1155,9 +1154,9 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
1155 seq_puts(seq, ",block_validity"); 1154 seq_puts(seq, ",block_validity");
1156 1155
1157 if (!test_opt(sb, INIT_INODE_TABLE)) 1156 if (!test_opt(sb, INIT_INODE_TABLE))
1158 seq_puts(seq, ",noinit_inode_table"); 1157 seq_puts(seq, ",noinit_itable");
1159 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT) 1158 else if (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)
1160 seq_printf(seq, ",init_inode_table=%u", 1159 seq_printf(seq, ",init_itable=%u",
1161 (unsigned) sbi->s_li_wait_mult); 1160 (unsigned) sbi->s_li_wait_mult);
1162 1161
1163 ext4_show_quota_options(seq, sb); 1162 ext4_show_quota_options(seq, sb);
@@ -1333,8 +1332,7 @@ enum {
1333 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, 1332 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1334 Opt_inode_readahead_blks, Opt_journal_ioprio, 1333 Opt_inode_readahead_blks, Opt_journal_ioprio,
1335 Opt_dioread_nolock, Opt_dioread_lock, 1334 Opt_dioread_nolock, Opt_dioread_lock,
1336 Opt_discard, Opt_nodiscard, 1335 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1337 Opt_init_inode_table, Opt_noinit_inode_table,
1338}; 1336};
1339 1337
1340static const match_table_t tokens = { 1338static const match_table_t tokens = {
@@ -1407,9 +1405,9 @@ static const match_table_t tokens = {
1407 {Opt_dioread_lock, "dioread_lock"}, 1405 {Opt_dioread_lock, "dioread_lock"},
1408 {Opt_discard, "discard"}, 1406 {Opt_discard, "discard"},
1409 {Opt_nodiscard, "nodiscard"}, 1407 {Opt_nodiscard, "nodiscard"},
1410 {Opt_init_inode_table, "init_itable=%u"}, 1408 {Opt_init_itable, "init_itable=%u"},
1411 {Opt_init_inode_table, "init_itable"}, 1409 {Opt_init_itable, "init_itable"},
1412 {Opt_noinit_inode_table, "noinit_itable"}, 1410 {Opt_noinit_itable, "noinit_itable"},
1413 {Opt_err, NULL}, 1411 {Opt_err, NULL},
1414}; 1412};
1415 1413
@@ -1892,7 +1890,7 @@ set_qf_format:
1892 case Opt_dioread_lock: 1890 case Opt_dioread_lock:
1893 clear_opt(sb, DIOREAD_NOLOCK); 1891 clear_opt(sb, DIOREAD_NOLOCK);
1894 break; 1892 break;
1895 case Opt_init_inode_table: 1893 case Opt_init_itable:
1896 set_opt(sb, INIT_INODE_TABLE); 1894 set_opt(sb, INIT_INODE_TABLE);
1897 if (args[0].from) { 1895 if (args[0].from) {
1898 if (match_int(&args[0], &option)) 1896 if (match_int(&args[0], &option))
@@ -1903,7 +1901,7 @@ set_qf_format:
1903 return 0; 1901 return 0;
1904 sbi->s_li_wait_mult = option; 1902 sbi->s_li_wait_mult = option;
1905 break; 1903 break;
1906 case Opt_noinit_inode_table: 1904 case Opt_noinit_itable:
1907 clear_opt(sb, INIT_INODE_TABLE); 1905 clear_opt(sb, INIT_INODE_TABLE);
1908 break; 1906 break;
1909 default: 1907 default:
@@ -2884,8 +2882,7 @@ cont_thread:
2884 } 2882 }
2885 mutex_unlock(&eli->li_list_mtx); 2883 mutex_unlock(&eli->li_list_mtx);
2886 2884
2887 if (freezing(current)) 2885 try_to_freeze();
2888 refrigerator();
2889 2886
2890 cur = jiffies; 2887 cur = jiffies;
2891 if ((time_after_eq(cur, next_wakeup)) || 2888 if ((time_after_eq(cur, next_wakeup)) ||
@@ -4783,7 +4780,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
4783 return -EINVAL; 4780 return -EINVAL;
4784 4781
4785 /* Quotafile not on the same filesystem? */ 4782 /* Quotafile not on the same filesystem? */
4786 if (path->mnt->mnt_sb != sb) 4783 if (path->dentry->d_sb != sb)
4787 return -EXDEV; 4784 return -EXDEV;
4788 /* Journaling quota? */ 4785 /* Journaling quota? */
4789 if (EXT4_SB(sb)->s_qf_names[type]) { 4786 if (EXT4_SB(sb)->s_qf_names[type]) {
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 1510a4d51990..66994f316e18 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -141,7 +141,7 @@ static inline struct msdos_inode_info *MSDOS_I(struct inode *inode)
141static inline int fat_mode_can_hold_ro(struct inode *inode) 141static inline int fat_mode_can_hold_ro(struct inode *inode)
142{ 142{
143 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); 143 struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
144 mode_t mask; 144 umode_t mask;
145 145
146 if (S_ISDIR(inode->i_mode)) { 146 if (S_ISDIR(inode->i_mode)) {
147 if (!sbi->options.rodir) 147 if (!sbi->options.rodir)
@@ -156,8 +156,8 @@ static inline int fat_mode_can_hold_ro(struct inode *inode)
156} 156}
157 157
158/* Convert attribute bits and a mask to the UNIX mode. */ 158/* Convert attribute bits and a mask to the UNIX mode. */
159static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, 159static inline umode_t fat_make_mode(struct msdos_sb_info *sbi,
160 u8 attrs, mode_t mode) 160 u8 attrs, umode_t mode)
161{ 161{
162 if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir)) 162 if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir))
163 mode &= ~S_IWUGO; 163 mode &= ~S_IWUGO;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c118acf16e43..a71fe3715ee8 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -44,7 +44,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
44 goto out; 44 goto out;
45 45
46 mutex_lock(&inode->i_mutex); 46 mutex_lock(&inode->i_mutex);
47 err = mnt_want_write(file->f_path.mnt); 47 err = mnt_want_write_file(file);
48 if (err) 48 if (err)
49 goto out_unlock_inode; 49 goto out_unlock_inode;
50 50
@@ -108,7 +108,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
108 fat_save_attrs(inode, attr); 108 fat_save_attrs(inode, attr);
109 mark_inode_dirty(inode); 109 mark_inode_dirty(inode);
110out_drop_write: 110out_drop_write:
111 mnt_drop_write(file->f_path.mnt); 111 mnt_drop_write_file(file);
112out_unlock_inode: 112out_unlock_inode:
113 mutex_unlock(&inode->i_mutex); 113 mutex_unlock(&inode->i_mutex);
114out: 114out:
@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(fat_getattr);
314static int fat_sanitize_mode(const struct msdos_sb_info *sbi, 314static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
315 struct inode *inode, umode_t *mode_ptr) 315 struct inode *inode, umode_t *mode_ptr)
316{ 316{
317 mode_t mask, perm; 317 umode_t mask, perm;
318 318
319 /* 319 /*
320 * Note, the basic check is already done by a caller of 320 * Note, the basic check is already done by a caller of
@@ -351,7 +351,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
351 351
352static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) 352static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
353{ 353{
354 mode_t allow_utime = sbi->options.allow_utime; 354 umode_t allow_utime = sbi->options.allow_utime;
355 355
356 if (current_fsuid() != inode->i_uid) { 356 if (current_fsuid() != inode->i_uid) {
357 if (in_group_p(inode->i_gid)) 357 if (in_group_p(inode->i_gid))
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 808cac7edcfb..3ab841054d53 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -518,7 +518,6 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
518static void fat_i_callback(struct rcu_head *head) 518static void fat_i_callback(struct rcu_head *head)
519{ 519{
520 struct inode *inode = container_of(head, struct inode, i_rcu); 520 struct inode *inode = container_of(head, struct inode, i_rcu);
521 INIT_LIST_HEAD(&inode->i_dentry);
522 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); 521 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
523} 522}
524 523
@@ -672,7 +671,7 @@ int fat_sync_inode(struct inode *inode)
672 671
673EXPORT_SYMBOL_GPL(fat_sync_inode); 672EXPORT_SYMBOL_GPL(fat_sync_inode);
674 673
675static int fat_show_options(struct seq_file *m, struct vfsmount *mnt); 674static int fat_show_options(struct seq_file *m, struct dentry *root);
676static const struct super_operations fat_sops = { 675static const struct super_operations fat_sops = {
677 .alloc_inode = fat_alloc_inode, 676 .alloc_inode = fat_alloc_inode,
678 .destroy_inode = fat_destroy_inode, 677 .destroy_inode = fat_destroy_inode,
@@ -811,9 +810,9 @@ static const struct export_operations fat_export_ops = {
811 .get_parent = fat_get_parent, 810 .get_parent = fat_get_parent,
812}; 811};
813 812
814static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) 813static int fat_show_options(struct seq_file *m, struct dentry *root)
815{ 814{
816 struct msdos_sb_info *sbi = MSDOS_SB(mnt->mnt_sb); 815 struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb);
817 struct fat_mount_options *opts = &sbi->options; 816 struct fat_mount_options *opts = &sbi->options;
818 int isvfat = opts->isvfat; 817 int isvfat = opts->isvfat;
819 818
@@ -898,7 +897,7 @@ enum {
898 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 897 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
899 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 898 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
900 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 899 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
901 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont, 900 Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
902 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err, 901 Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
903}; 902};
904 903
@@ -928,17 +927,17 @@ static const match_table_t fat_tokens = {
928 {Opt_err_panic, "errors=panic"}, 927 {Opt_err_panic, "errors=panic"},
929 {Opt_err_ro, "errors=remount-ro"}, 928 {Opt_err_ro, "errors=remount-ro"},
930 {Opt_discard, "discard"}, 929 {Opt_discard, "discard"},
931 {Opt_obsolate, "conv=binary"}, 930 {Opt_obsolete, "conv=binary"},
932 {Opt_obsolate, "conv=text"}, 931 {Opt_obsolete, "conv=text"},
933 {Opt_obsolate, "conv=auto"}, 932 {Opt_obsolete, "conv=auto"},
934 {Opt_obsolate, "conv=b"}, 933 {Opt_obsolete, "conv=b"},
935 {Opt_obsolate, "conv=t"}, 934 {Opt_obsolete, "conv=t"},
936 {Opt_obsolate, "conv=a"}, 935 {Opt_obsolete, "conv=a"},
937 {Opt_obsolate, "fat=%u"}, 936 {Opt_obsolete, "fat=%u"},
938 {Opt_obsolate, "blocksize=%u"}, 937 {Opt_obsolete, "blocksize=%u"},
939 {Opt_obsolate, "cvf_format=%20s"}, 938 {Opt_obsolete, "cvf_format=%20s"},
940 {Opt_obsolate, "cvf_options=%100s"}, 939 {Opt_obsolete, "cvf_options=%100s"},
941 {Opt_obsolate, "posix"}, 940 {Opt_obsolete, "posix"},
942 {Opt_err, NULL}, 941 {Opt_err, NULL},
943}; 942};
944static const match_table_t msdos_tokens = { 943static const match_table_t msdos_tokens = {
@@ -1170,7 +1169,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
1170 break; 1169 break;
1171 1170
1172 /* obsolete mount options */ 1171 /* obsolete mount options */
1173 case Opt_obsolate: 1172 case Opt_obsolete:
1174 fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, " 1173 fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
1175 "not supported now", p); 1174 "not supported now", p);
1176 break; 1175 break;
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 216b419f30e2..c5938c9084b9 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -264,7 +264,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
264} 264}
265 265
266/***** Create a file */ 266/***** Create a file */
267static int msdos_create(struct inode *dir, struct dentry *dentry, int mode, 267static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
268 struct nameidata *nd) 268 struct nameidata *nd)
269{ 269{
270 struct super_block *sb = dir->i_sb; 270 struct super_block *sb = dir->i_sb;
@@ -346,7 +346,7 @@ out:
346} 346}
347 347
348/***** Make a directory */ 348/***** Make a directory */
349static int msdos_mkdir(struct inode *dir, struct dentry *dentry, int mode) 349static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
350{ 350{
351 struct super_block *sb = dir->i_sb; 351 struct super_block *sb = dir->i_sb;
352 struct fat_slot_info sinfo; 352 struct fat_slot_info sinfo;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index c25cf151b84b..a81eb2367d39 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -782,7 +782,7 @@ error:
782 return ERR_PTR(err); 782 return ERR_PTR(err);
783} 783}
784 784
785static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, 785static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
786 struct nameidata *nd) 786 struct nameidata *nd)
787{ 787{
788 struct super_block *sb = dir->i_sb; 788 struct super_block *sb = dir->i_sb;
@@ -871,7 +871,7 @@ out:
871 return err; 871 return err;
872} 872}
873 873
874static int vfat_mkdir(struct inode *dir, struct dentry *dentry, int mode) 874static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
875{ 875{
876 struct super_block *sb = dir->i_sb; 876 struct super_block *sb = dir->i_sb;
877 struct inode *inode; 877 struct inode *inode;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6b088641f5bf..a48e4a139be1 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -10,6 +10,7 @@
10#include <linux/personality.h> 10#include <linux/personality.h>
11#include <asm/uaccess.h> 11#include <asm/uaccess.h>
12#include "internal.h" 12#include "internal.h"
13#include "mount.h"
13 14
14static long do_sys_name_to_handle(struct path *path, 15static long do_sys_name_to_handle(struct path *path,
15 struct file_handle __user *ufh, 16 struct file_handle __user *ufh,
@@ -24,8 +25,8 @@ static long do_sys_name_to_handle(struct path *path,
24 * We need t make sure wether the file system 25 * We need t make sure wether the file system
25 * support decoding of the file handle 26 * support decoding of the file handle
26 */ 27 */
27 if (!path->mnt->mnt_sb->s_export_op || 28 if (!path->dentry->d_sb->s_export_op ||
28 !path->mnt->mnt_sb->s_export_op->fh_to_dentry) 29 !path->dentry->d_sb->s_export_op->fh_to_dentry)
29 return -EOPNOTSUPP; 30 return -EOPNOTSUPP;
30 31
31 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) 32 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
@@ -66,7 +67,8 @@ static long do_sys_name_to_handle(struct path *path,
66 } else 67 } else
67 retval = 0; 68 retval = 0;
68 /* copy the mount id */ 69 /* copy the mount id */
69 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) || 70 if (copy_to_user(mnt_id, &real_mount(path->mnt)->mnt_id,
71 sizeof(*mnt_id)) ||
70 copy_to_user(ufh, handle, 72 copy_to_user(ufh, handle,
71 sizeof(struct file_handle) + handle_bytes)) 73 sizeof(struct file_handle) + handle_bytes))
72 retval = -EFAULT; 74 retval = -EFAULT;
diff --git a/fs/file_table.c b/fs/file_table.c
index c322794f7360..20002e39754d 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -474,29 +474,6 @@ void file_sb_list_del(struct file *file)
474 474
475#endif 475#endif
476 476
477int fs_may_remount_ro(struct super_block *sb)
478{
479 struct file *file;
480 /* Check that no files are currently opened for writing. */
481 lg_global_lock(files_lglock);
482 do_file_list_for_each_entry(sb, file) {
483 struct inode *inode = file->f_path.dentry->d_inode;
484
485 /* File with pending delete? */
486 if (inode->i_nlink == 0)
487 goto too_bad;
488
489 /* Writeable file? */
490 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
491 goto too_bad;
492 } while_file_list_for_each_entry;
493 lg_global_unlock(files_lglock);
494 return 1; /* Tis' cool bro. */
495too_bad:
496 lg_global_unlock(files_lglock);
497 return 0;
498}
499
500/** 477/**
501 * mark_files_ro - mark all files read-only 478 * mark_files_ro - mark all files read-only
502 * @sb: superblock in question 479 * @sb: superblock in question
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 0845f84f2a5f..96f24286667a 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -74,7 +74,6 @@ int register_filesystem(struct file_system_type * fs)
74 BUG_ON(strchr(fs->name, '.')); 74 BUG_ON(strchr(fs->name, '.'));
75 if (fs->next) 75 if (fs->next)
76 return -EBUSY; 76 return -EBUSY;
77 INIT_LIST_HEAD(&fs->fs_supers);
78 write_lock(&file_systems_lock); 77 write_lock(&file_systems_lock);
79 p = find_filesystem(fs->name, strlen(fs->name)); 78 p = find_filesystem(fs->name, strlen(fs->name));
80 if (*p) 79 if (*p)
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 7b2af5abe2fa..cf9ef918a2a9 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -187,10 +187,10 @@ vxfs_stiget(struct super_block *sbp, ino_t ino)
187 * vxfs_transmod returns a Linux mode_t for a given 187 * vxfs_transmod returns a Linux mode_t for a given
188 * VxFS inode structure. 188 * VxFS inode structure.
189 */ 189 */
190static __inline__ mode_t 190static __inline__ umode_t
191vxfs_transmod(struct vxfs_inode_info *vip) 191vxfs_transmod(struct vxfs_inode_info *vip)
192{ 192{
193 mode_t ret = vip->vii_mode & ~VXFS_TYPE_MASK; 193 umode_t ret = vip->vii_mode & ~VXFS_TYPE_MASK;
194 194
195 if (VXFS_ISFIFO(vip)) 195 if (VXFS_ISFIFO(vip))
196 ret |= S_IFIFO; 196 ret |= S_IFIFO;
@@ -340,7 +340,6 @@ vxfs_iget(struct super_block *sbp, ino_t ino)
340static void vxfs_i_callback(struct rcu_head *head) 340static void vxfs_i_callback(struct rcu_head *head)
341{ 341{
342 struct inode *inode = container_of(head, struct inode, i_rcu); 342 struct inode *inode = container_of(head, struct inode, i_rcu);
343 INIT_LIST_HEAD(&inode->i_dentry);
344 kmem_cache_free(vxfs_inode_cachep, inode->i_private); 343 kmem_cache_free(vxfs_inode_cachep, inode->i_private);
345} 344}
346 345
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 73c3992b2bb4..e2951506434d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,7 +25,6 @@
25#include <linux/writeback.h> 25#include <linux/writeback.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/buffer_head.h>
29#include <linux/tracepoint.h> 28#include <linux/tracepoint.h>
30#include "internal.h" 29#include "internal.h"
31 30
@@ -47,17 +46,6 @@ struct wb_writeback_work {
47 struct completion *done; /* set if the caller waits */ 46 struct completion *done; /* set if the caller waits */
48}; 47};
49 48
50const char *wb_reason_name[] = {
51 [WB_REASON_BACKGROUND] = "background",
52 [WB_REASON_TRY_TO_FREE_PAGES] = "try_to_free_pages",
53 [WB_REASON_SYNC] = "sync",
54 [WB_REASON_PERIODIC] = "periodic",
55 [WB_REASON_LAPTOP_TIMER] = "laptop_timer",
56 [WB_REASON_FREE_MORE_MEM] = "free_more_memory",
57 [WB_REASON_FS_FREE_SPACE] = "fs_free_space",
58 [WB_REASON_FORKER_THREAD] = "forker_thread"
59};
60
61/* 49/*
62 * Include the creation of the trace points after defining the 50 * Include the creation of the trace points after defining the
63 * wb_writeback_work structure so that the definition remains local to this 51 * wb_writeback_work structure so that the definition remains local to this
@@ -156,6 +144,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
156 * bdi_start_writeback - start writeback 144 * bdi_start_writeback - start writeback
157 * @bdi: the backing device to write from 145 * @bdi: the backing device to write from
158 * @nr_pages: the number of pages to write 146 * @nr_pages: the number of pages to write
147 * @reason: reason why some writeback work was initiated
159 * 148 *
160 * Description: 149 * Description:
161 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 150 * This does WB_SYNC_NONE opportunistic writeback. The IO is only
@@ -947,7 +936,7 @@ int bdi_writeback_thread(void *data)
947 936
948 trace_writeback_thread_start(bdi); 937 trace_writeback_thread_start(bdi);
949 938
950 while (!kthread_should_stop()) { 939 while (!kthread_freezable_should_stop(NULL)) {
951 /* 940 /*
952 * Remove own delayed wake-up timer, since we are already awake 941 * Remove own delayed wake-up timer, since we are already awake
953 * and we'll take care of the preriodic write-back. 942 * and we'll take care of the preriodic write-back.
@@ -977,8 +966,6 @@ int bdi_writeback_thread(void *data)
977 */ 966 */
978 schedule(); 967 schedule();
979 } 968 }
980
981 try_to_freeze();
982 } 969 }
983 970
984 /* Flush any work that raced with us exiting */ 971 /* Flush any work that raced with us exiting */
@@ -1223,6 +1210,7 @@ static void wait_sb_inodes(struct super_block *sb)
1223 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block 1210 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
1224 * @sb: the superblock 1211 * @sb: the superblock
1225 * @nr: the number of pages to write 1212 * @nr: the number of pages to write
1213 * @reason: reason why some writeback work initiated
1226 * 1214 *
1227 * Start writeback on some inodes on this super_block. No guarantees are made 1215 * Start writeback on some inodes on this super_block. No guarantees are made
1228 * on how many (if any) will be written, and this function does not wait 1216 * on how many (if any) will be written, and this function does not wait
@@ -1251,6 +1239,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_nr);
1251/** 1239/**
1252 * writeback_inodes_sb - writeback dirty inodes from given super_block 1240 * writeback_inodes_sb - writeback dirty inodes from given super_block
1253 * @sb: the superblock 1241 * @sb: the superblock
1242 * @reason: reason why some writeback work was initiated
1254 * 1243 *
1255 * Start writeback on some inodes on this super_block. No guarantees are made 1244 * Start writeback on some inodes on this super_block. No guarantees are made
1256 * on how many (if any) will be written, and this function does not wait 1245 * on how many (if any) will be written, and this function does not wait
@@ -1265,6 +1254,7 @@ EXPORT_SYMBOL(writeback_inodes_sb);
1265/** 1254/**
1266 * writeback_inodes_sb_if_idle - start writeback if none underway 1255 * writeback_inodes_sb_if_idle - start writeback if none underway
1267 * @sb: the superblock 1256 * @sb: the superblock
1257 * @reason: reason why some writeback work was initiated
1268 * 1258 *
1269 * Invoke writeback_inodes_sb if no writeback is currently underway. 1259 * Invoke writeback_inodes_sb if no writeback is currently underway.
1270 * Returns 1 if writeback was started, 0 if not. 1260 * Returns 1 if writeback was started, 0 if not.
@@ -1285,6 +1275,7 @@ EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1285 * writeback_inodes_sb_if_idle - start writeback if none underway 1275 * writeback_inodes_sb_if_idle - start writeback if none underway
1286 * @sb: the superblock 1276 * @sb: the superblock
1287 * @nr: the number of pages to write 1277 * @nr: the number of pages to write
1278 * @reason: reason why some writeback work was initiated
1288 * 1279 *
1289 * Invoke writeback_inodes_sb if no writeback is currently underway. 1280 * Invoke writeback_inodes_sb if no writeback is currently underway.
1290 * Returns 1 if writeback was started, 0 if not. 1281 * Returns 1 if writeback was started, 0 if not.
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 5cb8614508c3..2aaf3eaaf13d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1512,7 +1512,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1512 else if (outarg->offset + num > file_size) 1512 else if (outarg->offset + num > file_size)
1513 num = file_size - outarg->offset; 1513 num = file_size - outarg->offset;
1514 1514
1515 while (num) { 1515 while (num && req->num_pages < FUSE_MAX_PAGES_PER_REQ) {
1516 struct page *page; 1516 struct page *page;
1517 unsigned int this_num; 1517 unsigned int this_num;
1518 1518
@@ -1526,6 +1526,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
1526 1526
1527 num -= this_num; 1527 num -= this_num;
1528 total_len += this_num; 1528 total_len += this_num;
1529 index++;
1529 } 1530 }
1530 req->misc.retrieve_in.offset = outarg->offset; 1531 req->misc.retrieve_in.offset = outarg->offset;
1531 req->misc.retrieve_in.size = total_len; 1532 req->misc.retrieve_in.size = total_len;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9f63e493a9b6..5ddd6ea8f839 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -369,8 +369,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
369 * If the filesystem doesn't support this, then fall back to separate 369 * If the filesystem doesn't support this, then fall back to separate
370 * 'mknod' + 'open' requests. 370 * 'mknod' + 'open' requests.
371 */ 371 */
372static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, 372static int fuse_create_open(struct inode *dir, struct dentry *entry,
373 struct nameidata *nd) 373 umode_t mode, struct nameidata *nd)
374{ 374{
375 int err; 375 int err;
376 struct inode *inode; 376 struct inode *inode;
@@ -480,7 +480,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
480 */ 480 */
481static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req, 481static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
482 struct inode *dir, struct dentry *entry, 482 struct inode *dir, struct dentry *entry,
483 int mode) 483 umode_t mode)
484{ 484{
485 struct fuse_entry_out outarg; 485 struct fuse_entry_out outarg;
486 struct inode *inode; 486 struct inode *inode;
@@ -547,7 +547,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
547 return err; 547 return err;
548} 548}
549 549
550static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, 550static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
551 dev_t rdev) 551 dev_t rdev)
552{ 552{
553 struct fuse_mknod_in inarg; 553 struct fuse_mknod_in inarg;
@@ -573,7 +573,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
573 return create_new_entry(fc, req, dir, entry, mode); 573 return create_new_entry(fc, req, dir, entry, mode);
574} 574}
575 575
576static int fuse_create(struct inode *dir, struct dentry *entry, int mode, 576static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
577 struct nameidata *nd) 577 struct nameidata *nd)
578{ 578{
579 if (nd) { 579 if (nd) {
@@ -585,7 +585,7 @@ static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
585 return fuse_mknod(dir, entry, mode, 0); 585 return fuse_mknod(dir, entry, mode, 0);
586} 586}
587 587
588static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) 588static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
589{ 589{
590 struct fuse_mkdir_in inarg; 590 struct fuse_mkdir_in inarg;
591 struct fuse_conn *fc = get_fuse_conn(dir); 591 struct fuse_conn *fc = get_fuse_conn(dir);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 594f07a81c28..0c84100acd44 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1556,7 +1556,7 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1556 struct inode *inode = file->f_path.dentry->d_inode; 1556 struct inode *inode = file->f_path.dentry->d_inode;
1557 1557
1558 mutex_lock(&inode->i_mutex); 1558 mutex_lock(&inode->i_mutex);
1559 if (origin != SEEK_CUR || origin != SEEK_SET) { 1559 if (origin != SEEK_CUR && origin != SEEK_SET) {
1560 retval = fuse_update_attributes(inode, NULL, file, NULL); 1560 retval = fuse_update_attributes(inode, NULL, file, NULL);
1561 if (retval) 1561 if (retval)
1562 goto exit; 1562 goto exit;
@@ -1567,6 +1567,10 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1567 offset += i_size_read(inode); 1567 offset += i_size_read(inode);
1568 break; 1568 break;
1569 case SEEK_CUR: 1569 case SEEK_CUR:
1570 if (offset == 0) {
1571 retval = file->f_pos;
1572 goto exit;
1573 }
1570 offset += file->f_pos; 1574 offset += file->f_pos;
1571 break; 1575 break;
1572 case SEEK_DATA: 1576 case SEEK_DATA:
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index cf6db0a93219..1964da0257d9 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -80,7 +80,7 @@ struct fuse_inode {
80 80
81 /** The sticky bit in inode->i_mode may have been removed, so 81 /** The sticky bit in inode->i_mode may have been removed, so
82 preserve the original mode */ 82 preserve the original mode */
83 mode_t orig_i_mode; 83 umode_t orig_i_mode;
84 84
85 /** Version of last attribute change */ 85 /** Version of last attribute change */
86 u64 attr_version; 86 u64 attr_version;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3e6d72756479..64cf8d07393e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -107,7 +107,6 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
107static void fuse_i_callback(struct rcu_head *head) 107static void fuse_i_callback(struct rcu_head *head)
108{ 108{
109 struct inode *inode = container_of(head, struct inode, i_rcu); 109 struct inode *inode = container_of(head, struct inode, i_rcu);
110 INIT_LIST_HEAD(&inode->i_dentry);
111 kmem_cache_free(fuse_inode_cachep, inode); 110 kmem_cache_free(fuse_inode_cachep, inode);
112} 111}
113 112
@@ -498,9 +497,10 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
498 return 1; 497 return 1;
499} 498}
500 499
501static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt) 500static int fuse_show_options(struct seq_file *m, struct dentry *root)
502{ 501{
503 struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb); 502 struct super_block *sb = root->d_sb;
503 struct fuse_conn *fc = get_fuse_conn_super(sb);
504 504
505 seq_printf(m, ",user_id=%u", fc->user_id); 505 seq_printf(m, ",user_id=%u", fc->user_id);
506 seq_printf(m, ",group_id=%u", fc->group_id); 506 seq_printf(m, ",group_id=%u", fc->group_id);
@@ -510,9 +510,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
510 seq_puts(m, ",allow_other"); 510 seq_puts(m, ",allow_other");
511 if (fc->max_read != ~0) 511 if (fc->max_read != ~0)
512 seq_printf(m, ",max_read=%u", fc->max_read); 512 seq_printf(m, ",max_read=%u", fc->max_read);
513 if (mnt->mnt_sb->s_bdev && 513 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
514 mnt->mnt_sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 514 seq_printf(m, ",blksize=%lu", sb->s_blocksize);
515 seq_printf(m, ",blksize=%lu", mnt->mnt_sb->s_blocksize);
516 return 0; 515 return 0;
517} 516}
518 517
@@ -1138,28 +1137,28 @@ static int __init fuse_fs_init(void)
1138{ 1137{
1139 int err; 1138 int err;
1140 1139
1141 err = register_filesystem(&fuse_fs_type);
1142 if (err)
1143 goto out;
1144
1145 err = register_fuseblk();
1146 if (err)
1147 goto out_unreg;
1148
1149 fuse_inode_cachep = kmem_cache_create("fuse_inode", 1140 fuse_inode_cachep = kmem_cache_create("fuse_inode",
1150 sizeof(struct fuse_inode), 1141 sizeof(struct fuse_inode),
1151 0, SLAB_HWCACHE_ALIGN, 1142 0, SLAB_HWCACHE_ALIGN,
1152 fuse_inode_init_once); 1143 fuse_inode_init_once);
1153 err = -ENOMEM; 1144 err = -ENOMEM;
1154 if (!fuse_inode_cachep) 1145 if (!fuse_inode_cachep)
1155 goto out_unreg2; 1146 goto out;
1147
1148 err = register_fuseblk();
1149 if (err)
1150 goto out2;
1151
1152 err = register_filesystem(&fuse_fs_type);
1153 if (err)
1154 goto out3;
1156 1155
1157 return 0; 1156 return 0;
1158 1157
1159 out_unreg2: 1158 out3:
1160 unregister_fuseblk(); 1159 unregister_fuseblk();
1161 out_unreg: 1160 out2:
1162 unregister_filesystem(&fuse_fs_type); 1161 kmem_cache_destroy(fuse_inode_cachep);
1163 out: 1162 out:
1164 return err; 1163 return err;
1165} 1164}
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 65978d7885c8..230eb0f005b6 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -38,8 +38,9 @@ static const char *gfs2_acl_name(int type)
38 return NULL; 38 return NULL;
39} 39}
40 40
41static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) 41struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
42{ 42{
43 struct gfs2_inode *ip = GFS2_I(inode);
43 struct posix_acl *acl; 44 struct posix_acl *acl;
44 const char *name; 45 const char *name;
45 char *data; 46 char *data;
@@ -67,11 +68,6 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
67 return acl; 68 return acl;
68} 69}
69 70
70struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
71{
72 return gfs2_acl_get(GFS2_I(inode), type);
73}
74
75static int gfs2_set_mode(struct inode *inode, umode_t mode) 71static int gfs2_set_mode(struct inode *inode, umode_t mode)
76{ 72{
77 int error = 0; 73 int error = 0;
@@ -125,7 +121,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
125 if (S_ISLNK(inode->i_mode)) 121 if (S_ISLNK(inode->i_mode))
126 return 0; 122 return 0;
127 123
128 acl = gfs2_acl_get(dip, ACL_TYPE_DEFAULT); 124 acl = gfs2_get_acl(&dip->i_inode, ACL_TYPE_DEFAULT);
129 if (IS_ERR(acl)) 125 if (IS_ERR(acl))
130 return PTR_ERR(acl); 126 return PTR_ERR(acl);
131 if (!acl) { 127 if (!acl) {
@@ -166,7 +162,7 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
166 unsigned int len; 162 unsigned int len;
167 int error; 163 int error;
168 164
169 acl = gfs2_acl_get(ip, ACL_TYPE_ACCESS); 165 acl = gfs2_get_acl(&ip->i_inode, ACL_TYPE_ACCESS);
170 if (IS_ERR(acl)) 166 if (IS_ERR(acl))
171 return PTR_ERR(acl); 167 return PTR_ERR(acl);
172 if (!acl) 168 if (!acl)
@@ -216,7 +212,7 @@ static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
216 if (type < 0) 212 if (type < 0)
217 return type; 213 return type;
218 214
219 acl = gfs2_acl_get(GFS2_I(inode), type); 215 acl = gfs2_get_acl(inode, type);
220 if (IS_ERR(acl)) 216 if (IS_ERR(acl))
221 return PTR_ERR(acl); 217 return PTR_ERR(acl);
222 if (acl == NULL) 218 if (acl == NULL)
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4858e1fed8b1..501e5cba09b3 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -615,7 +615,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
615 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 615 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
616 int alloc_required; 616 int alloc_required;
617 int error = 0; 617 int error = 0;
618 struct gfs2_alloc *al = NULL; 618 struct gfs2_qadata *qa = NULL;
619 pgoff_t index = pos >> PAGE_CACHE_SHIFT; 619 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
620 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 620 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
621 struct page *page; 621 struct page *page;
@@ -639,8 +639,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
639 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks); 639 gfs2_write_calc_reserv(ip, len, &data_blocks, &ind_blocks);
640 640
641 if (alloc_required) { 641 if (alloc_required) {
642 al = gfs2_alloc_get(ip); 642 qa = gfs2_qadata_get(ip);
643 if (!al) { 643 if (!qa) {
644 error = -ENOMEM; 644 error = -ENOMEM;
645 goto out_unlock; 645 goto out_unlock;
646 } 646 }
@@ -649,8 +649,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
649 if (error) 649 if (error)
650 goto out_alloc_put; 650 goto out_alloc_put;
651 651
652 al->al_requested = data_blocks + ind_blocks; 652 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
653 error = gfs2_inplace_reserve(ip);
654 if (error) 653 if (error)
655 goto out_qunlock; 654 goto out_qunlock;
656 } 655 }
@@ -711,7 +710,7 @@ out_trans_fail:
711out_qunlock: 710out_qunlock:
712 gfs2_quota_unlock(ip); 711 gfs2_quota_unlock(ip);
713out_alloc_put: 712out_alloc_put:
714 gfs2_alloc_put(ip); 713 gfs2_qadata_put(ip);
715 } 714 }
716out_unlock: 715out_unlock:
717 if (&ip->i_inode == sdp->sd_rindex) { 716 if (&ip->i_inode == sdp->sd_rindex) {
@@ -848,7 +847,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
848 struct gfs2_sbd *sdp = GFS2_SB(inode); 847 struct gfs2_sbd *sdp = GFS2_SB(inode);
849 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); 848 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
850 struct buffer_head *dibh; 849 struct buffer_head *dibh;
851 struct gfs2_alloc *al = ip->i_alloc; 850 struct gfs2_qadata *qa = ip->i_qadata;
852 unsigned int from = pos & (PAGE_CACHE_SIZE - 1); 851 unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
853 unsigned int to = from + len; 852 unsigned int to = from + len;
854 int ret; 853 int ret;
@@ -880,10 +879,11 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
880 brelse(dibh); 879 brelse(dibh);
881failed: 880failed:
882 gfs2_trans_end(sdp); 881 gfs2_trans_end(sdp);
883 if (al) { 882 if (ip->i_res)
884 gfs2_inplace_release(ip); 883 gfs2_inplace_release(ip);
884 if (qa) {
885 gfs2_quota_unlock(ip); 885 gfs2_quota_unlock(ip);
886 gfs2_alloc_put(ip); 886 gfs2_qadata_put(ip);
887 } 887 }
888 if (inode == sdp->sd_rindex) { 888 if (inode == sdp->sd_rindex) {
889 gfs2_glock_dq(&m_ip->i_gh); 889 gfs2_glock_dq(&m_ip->i_gh);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 41d494d79709..14a704015970 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -133,7 +133,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
133 and write it out to disk */ 133 and write it out to disk */
134 134
135 unsigned int n = 1; 135 unsigned int n = 1;
136 error = gfs2_alloc_block(ip, &block, &n); 136 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
137 if (error) 137 if (error)
138 goto out_brelse; 138 goto out_brelse;
139 if (isdir) { 139 if (isdir) {
@@ -503,7 +503,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
503 do { 503 do {
504 int error; 504 int error;
505 n = blks - alloced; 505 n = blks - alloced;
506 error = gfs2_alloc_block(ip, &bn, &n); 506 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
507 if (error) 507 if (error)
508 return error; 508 return error;
509 alloced += n; 509 alloced += n;
@@ -743,9 +743,6 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
743 else if (ip->i_depth) 743 else if (ip->i_depth)
744 revokes = sdp->sd_inptrs; 744 revokes = sdp->sd_inptrs;
745 745
746 if (error)
747 return error;
748
749 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); 746 memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
750 bstart = 0; 747 bstart = 0;
751 blen = 0; 748 blen = 0;
@@ -1044,7 +1041,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1044 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift; 1041 lblock = (size - 1) >> sdp->sd_sb.sb_bsize_shift;
1045 1042
1046 find_metapath(sdp, lblock, &mp, ip->i_height); 1043 find_metapath(sdp, lblock, &mp, ip->i_height);
1047 if (!gfs2_alloc_get(ip)) 1044 if (!gfs2_qadata_get(ip))
1048 return -ENOMEM; 1045 return -ENOMEM;
1049 1046
1050 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1047 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1064,7 +1061,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
1064 gfs2_quota_unhold(ip); 1061 gfs2_quota_unhold(ip);
1065 1062
1066out: 1063out:
1067 gfs2_alloc_put(ip); 1064 gfs2_qadata_put(ip);
1068 return error; 1065 return error;
1069} 1066}
1070 1067
@@ -1166,21 +1163,20 @@ static int do_grow(struct inode *inode, u64 size)
1166 struct gfs2_inode *ip = GFS2_I(inode); 1163 struct gfs2_inode *ip = GFS2_I(inode);
1167 struct gfs2_sbd *sdp = GFS2_SB(inode); 1164 struct gfs2_sbd *sdp = GFS2_SB(inode);
1168 struct buffer_head *dibh; 1165 struct buffer_head *dibh;
1169 struct gfs2_alloc *al = NULL; 1166 struct gfs2_qadata *qa = NULL;
1170 int error; 1167 int error;
1171 1168
1172 if (gfs2_is_stuffed(ip) && 1169 if (gfs2_is_stuffed(ip) &&
1173 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { 1170 (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) {
1174 al = gfs2_alloc_get(ip); 1171 qa = gfs2_qadata_get(ip);
1175 if (al == NULL) 1172 if (qa == NULL)
1176 return -ENOMEM; 1173 return -ENOMEM;
1177 1174
1178 error = gfs2_quota_lock_check(ip); 1175 error = gfs2_quota_lock_check(ip);
1179 if (error) 1176 if (error)
1180 goto do_grow_alloc_put; 1177 goto do_grow_alloc_put;
1181 1178
1182 al->al_requested = 1; 1179 error = gfs2_inplace_reserve(ip, 1);
1183 error = gfs2_inplace_reserve(ip);
1184 if (error) 1180 if (error)
1185 goto do_grow_qunlock; 1181 goto do_grow_qunlock;
1186 } 1182 }
@@ -1189,7 +1185,7 @@ static int do_grow(struct inode *inode, u64 size)
1189 if (error) 1185 if (error)
1190 goto do_grow_release; 1186 goto do_grow_release;
1191 1187
1192 if (al) { 1188 if (qa) {
1193 error = gfs2_unstuff_dinode(ip, NULL); 1189 error = gfs2_unstuff_dinode(ip, NULL);
1194 if (error) 1190 if (error)
1195 goto do_end_trans; 1191 goto do_end_trans;
@@ -1208,12 +1204,12 @@ static int do_grow(struct inode *inode, u64 size)
1208do_end_trans: 1204do_end_trans:
1209 gfs2_trans_end(sdp); 1205 gfs2_trans_end(sdp);
1210do_grow_release: 1206do_grow_release:
1211 if (al) { 1207 if (qa) {
1212 gfs2_inplace_release(ip); 1208 gfs2_inplace_release(ip);
1213do_grow_qunlock: 1209do_grow_qunlock:
1214 gfs2_quota_unlock(ip); 1210 gfs2_quota_unlock(ip);
1215do_grow_alloc_put: 1211do_grow_alloc_put:
1216 gfs2_alloc_put(ip); 1212 gfs2_qadata_put(ip);
1217 } 1213 }
1218 return error; 1214 return error;
1219} 1215}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 8ccad2467cb6..c35573abd371 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -76,6 +76,8 @@
76#define IS_LEAF 1 /* Hashed (leaf) directory */ 76#define IS_LEAF 1 /* Hashed (leaf) directory */
77#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ 77#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */
78 78
79#define MAX_RA_BLOCKS 32 /* max read-ahead blocks */
80
79#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) 81#define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1)
80#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) 82#define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1))
81 83
@@ -821,7 +823,7 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh,
821 struct gfs2_dirent *dent; 823 struct gfs2_dirent *dent;
822 struct qstr name = { .name = "", .len = 0, .hash = 0 }; 824 struct qstr name = { .name = "", .len = 0, .hash = 0 };
823 825
824 error = gfs2_alloc_block(ip, &bn, &n); 826 error = gfs2_alloc_blocks(ip, &bn, &n, 0, NULL);
825 if (error) 827 if (error)
826 return NULL; 828 return NULL;
827 bh = gfs2_meta_new(ip->i_gl, bn); 829 bh = gfs2_meta_new(ip->i_gl, bn);
@@ -1376,6 +1378,52 @@ out:
1376 return error; 1378 return error;
1377} 1379}
1378 1380
1381/**
1382 * gfs2_dir_readahead - Issue read-ahead requests for leaf blocks.
1383 *
1384 * Note: we can't calculate each index like dir_e_read can because we don't
1385 * have the leaf, and therefore we don't have the depth, and therefore we
1386 * don't have the length. So we have to just read enough ahead to make up
1387 * for the loss of information.
1388 */
1389static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index,
1390 struct file_ra_state *f_ra)
1391{
1392 struct gfs2_inode *ip = GFS2_I(inode);
1393 struct gfs2_glock *gl = ip->i_gl;
1394 struct buffer_head *bh;
1395 u64 blocknr = 0, last;
1396 unsigned count;
1397
1398 /* First check if we've already read-ahead for the whole range. */
1399 if (index + MAX_RA_BLOCKS < f_ra->start)
1400 return;
1401
1402 f_ra->start = max((pgoff_t)index, f_ra->start);
1403 for (count = 0; count < MAX_RA_BLOCKS; count++) {
1404 if (f_ra->start >= hsize) /* if exceeded the hash table */
1405 break;
1406
1407 last = blocknr;
1408 blocknr = be64_to_cpu(ip->i_hash_cache[f_ra->start]);
1409 f_ra->start++;
1410 if (blocknr == last)
1411 continue;
1412
1413 bh = gfs2_getbuf(gl, blocknr, 1);
1414 if (trylock_buffer(bh)) {
1415 if (buffer_uptodate(bh)) {
1416 unlock_buffer(bh);
1417 brelse(bh);
1418 continue;
1419 }
1420 bh->b_end_io = end_buffer_read_sync;
1421 submit_bh(READA | REQ_META, bh);
1422 continue;
1423 }
1424 brelse(bh);
1425 }
1426}
1379 1427
1380/** 1428/**
1381 * dir_e_read - Reads the entries from a directory into a filldir buffer 1429 * dir_e_read - Reads the entries from a directory into a filldir buffer
@@ -1388,7 +1436,7 @@ out:
1388 */ 1436 */
1389 1437
1390static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, 1438static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1391 filldir_t filldir) 1439 filldir_t filldir, struct file_ra_state *f_ra)
1392{ 1440{
1393 struct gfs2_inode *dip = GFS2_I(inode); 1441 struct gfs2_inode *dip = GFS2_I(inode);
1394 u32 hsize, len = 0; 1442 u32 hsize, len = 0;
@@ -1402,10 +1450,14 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1402 hash = gfs2_dir_offset2hash(*offset); 1450 hash = gfs2_dir_offset2hash(*offset);
1403 index = hash >> (32 - dip->i_depth); 1451 index = hash >> (32 - dip->i_depth);
1404 1452
1453 if (dip->i_hash_cache == NULL)
1454 f_ra->start = 0;
1405 lp = gfs2_dir_get_hash_table(dip); 1455 lp = gfs2_dir_get_hash_table(dip);
1406 if (IS_ERR(lp)) 1456 if (IS_ERR(lp))
1407 return PTR_ERR(lp); 1457 return PTR_ERR(lp);
1408 1458
1459 gfs2_dir_readahead(inode, hsize, index, f_ra);
1460
1409 while (index < hsize) { 1461 while (index < hsize) {
1410 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, 1462 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1411 &copied, &depth, 1463 &copied, &depth,
@@ -1423,7 +1475,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1423} 1475}
1424 1476
1425int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 1477int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1426 filldir_t filldir) 1478 filldir_t filldir, struct file_ra_state *f_ra)
1427{ 1479{
1428 struct gfs2_inode *dip = GFS2_I(inode); 1480 struct gfs2_inode *dip = GFS2_I(inode);
1429 struct gfs2_sbd *sdp = GFS2_SB(inode); 1481 struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1437,7 +1489,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
1437 return 0; 1489 return 0;
1438 1490
1439 if (dip->i_diskflags & GFS2_DIF_EXHASH) 1491 if (dip->i_diskflags & GFS2_DIF_EXHASH)
1440 return dir_e_read(inode, offset, opaque, filldir); 1492 return dir_e_read(inode, offset, opaque, filldir, f_ra);
1441 1493
1442 if (!gfs2_is_stuffed(dip)) { 1494 if (!gfs2_is_stuffed(dip)) {
1443 gfs2_consist_inode(dip); 1495 gfs2_consist_inode(dip);
@@ -1798,7 +1850,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
1798 if (!ht) 1850 if (!ht)
1799 return -ENOMEM; 1851 return -ENOMEM;
1800 1852
1801 if (!gfs2_alloc_get(dip)) { 1853 if (!gfs2_qadata_get(dip)) {
1802 error = -ENOMEM; 1854 error = -ENOMEM;
1803 goto out; 1855 goto out;
1804 } 1856 }
@@ -1887,7 +1939,7 @@ out_rlist:
1887 gfs2_rlist_free(&rlist); 1939 gfs2_rlist_free(&rlist);
1888 gfs2_quota_unhold(dip); 1940 gfs2_quota_unhold(dip);
1889out_put: 1941out_put:
1890 gfs2_alloc_put(dip); 1942 gfs2_qadata_put(dip);
1891out: 1943out:
1892 kfree(ht); 1944 kfree(ht);
1893 return error; 1945 return error;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index ff5772fbf024..98c960beab35 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -25,7 +25,7 @@ extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
25 const struct gfs2_inode *ip); 25 const struct gfs2_inode *ip);
26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); 26extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry);
27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, 27extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
28 filldir_t filldir); 28 filldir_t filldir, struct file_ra_state *f_ra);
29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, 29extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
30 const struct gfs2_inode *nip, unsigned int new_type); 30 const struct gfs2_inode *nip, unsigned int new_type);
31 31
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index fe9945f2ff72..70ba891654f8 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -99,6 +99,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
99 struct gfs2_holder gh; 99 struct gfs2_holder gh;
100 u64 offset = 0; 100 u64 offset = 0;
101 int error; 101 int error;
102 struct file_ra_state f_ra = { .start = 0 };
102 103
103 if (!dir) 104 if (!dir)
104 return -EINVAL; 105 return -EINVAL;
@@ -118,7 +119,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
118 if (error) 119 if (error)
119 return error; 120 return error;
120 121
121 error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir); 122 error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir, &f_ra);
122 123
123 gfs2_glock_dq_uninit(&gh); 124 gfs2_glock_dq_uninit(&gh);
124 125
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ce36a56dfeac..c5fb3597f696 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -105,7 +105,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
105 return error; 105 return error;
106 } 106 }
107 107
108 error = gfs2_dir_read(dir, &offset, dirent, filldir); 108 error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra);
109 109
110 gfs2_glock_dq_uninit(&d_gh); 110 gfs2_glock_dq_uninit(&d_gh);
111 111
@@ -223,7 +223,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
223 int error; 223 int error;
224 u32 new_flags, flags; 224 u32 new_flags, flags;
225 225
226 error = mnt_want_write(filp->f_path.mnt); 226 error = mnt_want_write_file(filp);
227 if (error) 227 if (error)
228 return error; 228 return error;
229 229
@@ -285,7 +285,7 @@ out_trans_end:
285out: 285out:
286 gfs2_glock_dq_uninit(&gh); 286 gfs2_glock_dq_uninit(&gh);
287out_drop_write: 287out_drop_write:
288 mnt_drop_write(filp->f_path.mnt); 288 mnt_drop_write_file(filp);
289 return error; 289 return error;
290} 290}
291 291
@@ -365,7 +365,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
365 u64 pos = page->index << PAGE_CACHE_SHIFT; 365 u64 pos = page->index << PAGE_CACHE_SHIFT;
366 unsigned int data_blocks, ind_blocks, rblocks; 366 unsigned int data_blocks, ind_blocks, rblocks;
367 struct gfs2_holder gh; 367 struct gfs2_holder gh;
368 struct gfs2_alloc *al; 368 struct gfs2_qadata *qa;
369 loff_t size; 369 loff_t size;
370 int ret; 370 int ret;
371 371
@@ -393,16 +393,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
393 } 393 }
394 394
395 ret = -ENOMEM; 395 ret = -ENOMEM;
396 al = gfs2_alloc_get(ip); 396 qa = gfs2_qadata_get(ip);
397 if (al == NULL) 397 if (qa == NULL)
398 goto out_unlock; 398 goto out_unlock;
399 399
400 ret = gfs2_quota_lock_check(ip); 400 ret = gfs2_quota_lock_check(ip);
401 if (ret) 401 if (ret)
402 goto out_alloc_put; 402 goto out_alloc_put;
403 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); 403 gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
404 al->al_requested = data_blocks + ind_blocks; 404 ret = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
405 ret = gfs2_inplace_reserve(ip);
406 if (ret) 405 if (ret)
407 goto out_quota_unlock; 406 goto out_quota_unlock;
408 407
@@ -448,7 +447,7 @@ out_trans_fail:
448out_quota_unlock: 447out_quota_unlock:
449 gfs2_quota_unlock(ip); 448 gfs2_quota_unlock(ip);
450out_alloc_put: 449out_alloc_put:
451 gfs2_alloc_put(ip); 450 gfs2_qadata_put(ip);
452out_unlock: 451out_unlock:
453 gfs2_glock_dq(&gh); 452 gfs2_glock_dq(&gh);
454out: 453out:
@@ -609,7 +608,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
609 struct inode *inode = mapping->host; 608 struct inode *inode = mapping->host;
610 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 609 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
611 struct gfs2_inode *ip = GFS2_I(inode); 610 struct gfs2_inode *ip = GFS2_I(inode);
612 int ret, ret1 = 0; 611 int ret = 0, ret1 = 0;
613 612
614 if (mapping->nrpages) { 613 if (mapping->nrpages) {
615 ret1 = filemap_fdatawrite_range(mapping, start, end); 614 ret1 = filemap_fdatawrite_range(mapping, start, end);
@@ -750,8 +749,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
750 struct gfs2_inode *ip = GFS2_I(inode); 749 struct gfs2_inode *ip = GFS2_I(inode);
751 unsigned int data_blocks = 0, ind_blocks = 0, rblocks; 750 unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
752 loff_t bytes, max_bytes; 751 loff_t bytes, max_bytes;
753 struct gfs2_alloc *al; 752 struct gfs2_qadata *qa;
754 int error; 753 int error;
754 const loff_t pos = offset;
755 const loff_t count = len;
755 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); 756 loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
756 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; 757 loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
757 loff_t max_chunk_size = UINT_MAX & bsize_mask; 758 loff_t max_chunk_size = UINT_MAX & bsize_mask;
@@ -782,8 +783,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
782 while (len > 0) { 783 while (len > 0) {
783 if (len < bytes) 784 if (len < bytes)
784 bytes = len; 785 bytes = len;
785 al = gfs2_alloc_get(ip); 786 qa = gfs2_qadata_get(ip);
786 if (!al) { 787 if (!qa) {
787 error = -ENOMEM; 788 error = -ENOMEM;
788 goto out_unlock; 789 goto out_unlock;
789 } 790 }
@@ -795,8 +796,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
795retry: 796retry:
796 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); 797 gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
797 798
798 al->al_requested = data_blocks + ind_blocks; 799 error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks);
799 error = gfs2_inplace_reserve(ip);
800 if (error) { 800 if (error) {
801 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { 801 if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
802 bytes >>= 1; 802 bytes >>= 1;
@@ -810,7 +810,6 @@ retry:
810 max_bytes = bytes; 810 max_bytes = bytes;
811 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, 811 calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
812 &max_bytes, &data_blocks, &ind_blocks); 812 &max_bytes, &data_blocks, &ind_blocks);
813 al->al_requested = data_blocks + ind_blocks;
814 813
815 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + 814 rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
816 RES_RG_HDR + gfs2_rg_blocks(ip); 815 RES_RG_HDR + gfs2_rg_blocks(ip);
@@ -832,8 +831,11 @@ retry:
832 offset += max_bytes; 831 offset += max_bytes;
833 gfs2_inplace_release(ip); 832 gfs2_inplace_release(ip);
834 gfs2_quota_unlock(ip); 833 gfs2_quota_unlock(ip);
835 gfs2_alloc_put(ip); 834 gfs2_qadata_put(ip);
836 } 835 }
836
837 if (error == 0)
838 error = generic_write_sync(file, pos, count);
837 goto out_unlock; 839 goto out_unlock;
838 840
839out_trans_fail: 841out_trans_fail:
@@ -841,7 +843,7 @@ out_trans_fail:
841out_qunlock: 843out_qunlock:
842 gfs2_quota_unlock(ip); 844 gfs2_quota_unlock(ip);
843out_alloc_put: 845out_alloc_put:
844 gfs2_alloc_put(ip); 846 gfs2_qadata_put(ip);
845out_unlock: 847out_unlock:
846 gfs2_glock_dq(&ip->i_gh); 848 gfs2_glock_dq(&ip->i_gh);
847out_uninit: 849out_uninit:
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 7389dfdcc9ef..e1d3bb59945c 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -244,17 +244,16 @@ struct gfs2_glock {
244 244
245#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 245#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
246 246
247struct gfs2_alloc { 247struct gfs2_qadata { /* quota allocation data */
248 /* Quota stuff */ 248 /* Quota stuff */
249 struct gfs2_quota_data *al_qd[2*MAXQUOTAS]; 249 struct gfs2_quota_data *qa_qd[2*MAXQUOTAS];
250 struct gfs2_holder al_qd_ghs[2*MAXQUOTAS]; 250 struct gfs2_holder qa_qd_ghs[2*MAXQUOTAS];
251 unsigned int al_qd_num; 251 unsigned int qa_qd_num;
252 252};
253 u32 al_requested; /* Filled in by caller of gfs2_inplace_reserve() */
254 u32 al_alloced; /* Filled in by gfs2_alloc_*() */
255 253
256 /* Filled in by gfs2_inplace_reserve() */ 254struct gfs2_blkreserv {
257 struct gfs2_holder al_rgd_gh; 255 u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */
256 struct gfs2_holder rs_rgd_gh; /* Filled in by gfs2_inplace_reserve() */
258}; 257};
259 258
260enum { 259enum {
@@ -275,7 +274,8 @@ struct gfs2_inode {
275 struct gfs2_glock *i_gl; /* Move into i_gh? */ 274 struct gfs2_glock *i_gl; /* Move into i_gh? */
276 struct gfs2_holder i_iopen_gh; 275 struct gfs2_holder i_iopen_gh;
277 struct gfs2_holder i_gh; /* for prepare/commit_write only */ 276 struct gfs2_holder i_gh; /* for prepare/commit_write only */
278 struct gfs2_alloc *i_alloc; 277 struct gfs2_qadata *i_qadata; /* quota allocation data */
278 struct gfs2_blkreserv *i_res; /* resource group block reservation */
279 struct gfs2_rgrpd *i_rgd; 279 struct gfs2_rgrpd *i_rgd;
280 u64 i_goal; /* goal block for allocations */ 280 u64 i_goal; /* goal block for allocations */
281 struct rw_semaphore i_rw_mutex; 281 struct rw_semaphore i_rw_mutex;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index cfd4959b218c..017960cf1d7a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -333,7 +333,7 @@ out:
333 */ 333 */
334 334
335static int create_ok(struct gfs2_inode *dip, const struct qstr *name, 335static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
336 unsigned int mode) 336 umode_t mode)
337{ 337{
338 int error; 338 int error;
339 339
@@ -364,7 +364,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
364 return 0; 364 return 0;
365} 365}
366 366
367static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, 367static void munge_mode_uid_gid(struct gfs2_inode *dip, umode_t *mode,
368 unsigned int *uid, unsigned int *gid) 368 unsigned int *uid, unsigned int *gid)
369{ 369{
370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir && 370 if (GFS2_SB(&dip->i_inode)->sd_args.ar_suiddir &&
@@ -389,12 +389,13 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
389{ 389{
390 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 390 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
391 int error; 391 int error;
392 int dblocks = 1;
392 393
393 if (gfs2_alloc_get(dip) == NULL) 394 error = gfs2_rindex_update(sdp);
394 return -ENOMEM; 395 if (error)
396 fs_warn(sdp, "rindex update returns %d\n", error);
395 397
396 dip->i_alloc->al_requested = RES_DINODE; 398 error = gfs2_inplace_reserve(dip, RES_DINODE);
397 error = gfs2_inplace_reserve(dip);
398 if (error) 399 if (error)
399 goto out; 400 goto out;
400 401
@@ -402,14 +403,13 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
402 if (error) 403 if (error)
403 goto out_ipreserv; 404 goto out_ipreserv;
404 405
405 error = gfs2_alloc_di(dip, no_addr, generation); 406 error = gfs2_alloc_blocks(dip, no_addr, &dblocks, 1, generation);
406 407
407 gfs2_trans_end(sdp); 408 gfs2_trans_end(sdp);
408 409
409out_ipreserv: 410out_ipreserv:
410 gfs2_inplace_release(dip); 411 gfs2_inplace_release(dip);
411out: 412out:
412 gfs2_alloc_put(dip);
413 return error; 413 return error;
414} 414}
415 415
@@ -447,7 +447,7 @@ static void gfs2_init_dir(struct buffer_head *dibh,
447 */ 447 */
448 448
449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 449static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
450 const struct gfs2_inum_host *inum, unsigned int mode, 450 const struct gfs2_inum_host *inum, umode_t mode,
451 unsigned int uid, unsigned int gid, 451 unsigned int uid, unsigned int gid,
452 const u64 *generation, dev_t dev, const char *symname, 452 const u64 *generation, dev_t dev, const char *symname,
453 unsigned size, struct buffer_head **bhp) 453 unsigned size, struct buffer_head **bhp)
@@ -516,7 +516,7 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
516} 516}
517 517
518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, 518static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
519 unsigned int mode, const struct gfs2_inum_host *inum, 519 umode_t mode, const struct gfs2_inum_host *inum,
520 const u64 *generation, dev_t dev, const char *symname, 520 const u64 *generation, dev_t dev, const char *symname,
521 unsigned int size, struct buffer_head **bhp) 521 unsigned int size, struct buffer_head **bhp)
522{ 522{
@@ -525,7 +525,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
525 int error; 525 int error;
526 526
527 munge_mode_uid_gid(dip, &mode, &uid, &gid); 527 munge_mode_uid_gid(dip, &mode, &uid, &gid);
528 if (!gfs2_alloc_get(dip)) 528 if (!gfs2_qadata_get(dip))
529 return -ENOMEM; 529 return -ENOMEM;
530 530
531 error = gfs2_quota_lock(dip, uid, gid); 531 error = gfs2_quota_lock(dip, uid, gid);
@@ -547,7 +547,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
547out_quota: 547out_quota:
548 gfs2_quota_unlock(dip); 548 gfs2_quota_unlock(dip);
549out: 549out:
550 gfs2_alloc_put(dip); 550 gfs2_qadata_put(dip);
551 return error; 551 return error;
552} 552}
553 553
@@ -555,13 +555,13 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
555 struct gfs2_inode *ip) 555 struct gfs2_inode *ip)
556{ 556{
557 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 557 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
558 struct gfs2_alloc *al; 558 struct gfs2_qadata *qa;
559 int alloc_required; 559 int alloc_required;
560 struct buffer_head *dibh; 560 struct buffer_head *dibh;
561 int error; 561 int error;
562 562
563 al = gfs2_alloc_get(dip); 563 qa = gfs2_qadata_get(dip);
564 if (!al) 564 if (!qa)
565 return -ENOMEM; 565 return -ENOMEM;
566 566
567 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 567 error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -576,9 +576,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
576 if (error) 576 if (error)
577 goto fail_quota_locks; 577 goto fail_quota_locks;
578 578
579 al->al_requested = sdp->sd_max_dirres; 579 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
580
581 error = gfs2_inplace_reserve(dip);
582 if (error) 580 if (error)
583 goto fail_quota_locks; 581 goto fail_quota_locks;
584 582
@@ -619,11 +617,11 @@ fail_quota_locks:
619 gfs2_quota_unlock(dip); 617 gfs2_quota_unlock(dip);
620 618
621fail: 619fail:
622 gfs2_alloc_put(dip); 620 gfs2_qadata_put(dip);
623 return error; 621 return error;
624} 622}
625 623
626int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, 624static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
627 void *fs_info) 625 void *fs_info)
628{ 626{
629 const struct xattr *xattr; 627 const struct xattr *xattr;
@@ -659,7 +657,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
659 */ 657 */
660 658
661static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, 659static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
662 unsigned int mode, dev_t dev, const char *symname, 660 umode_t mode, dev_t dev, const char *symname,
663 unsigned int size, int excl) 661 unsigned int size, int excl)
664{ 662{
665 const struct qstr *name = &dentry->d_name; 663 const struct qstr *name = &dentry->d_name;
@@ -728,9 +726,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
728 brelse(bh); 726 brelse(bh);
729 727
730 gfs2_trans_end(sdp); 728 gfs2_trans_end(sdp);
731 gfs2_inplace_release(dip); 729 /* Check if we reserved space in the rgrp. Function link_dinode may
730 not, depending on whether alloc is required. */
731 if (dip->i_res)
732 gfs2_inplace_release(dip);
732 gfs2_quota_unlock(dip); 733 gfs2_quota_unlock(dip);
733 gfs2_alloc_put(dip); 734 gfs2_qadata_put(dip);
734 mark_inode_dirty(inode); 735 mark_inode_dirty(inode);
735 gfs2_glock_dq_uninit_m(2, ghs); 736 gfs2_glock_dq_uninit_m(2, ghs);
736 d_instantiate(dentry, inode); 737 d_instantiate(dentry, inode);
@@ -760,7 +761,7 @@ fail:
760 */ 761 */
761 762
762static int gfs2_create(struct inode *dir, struct dentry *dentry, 763static int gfs2_create(struct inode *dir, struct dentry *dentry,
763 int mode, struct nameidata *nd) 764 umode_t mode, struct nameidata *nd)
764{ 765{
765 int excl = 0; 766 int excl = 0;
766 if (nd && (nd->flags & LOOKUP_EXCL)) 767 if (nd && (nd->flags & LOOKUP_EXCL))
@@ -875,8 +876,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
875 error = 0; 876 error = 0;
876 877
877 if (alloc_required) { 878 if (alloc_required) {
878 struct gfs2_alloc *al = gfs2_alloc_get(dip); 879 struct gfs2_qadata *qa = gfs2_qadata_get(dip);
879 if (!al) { 880
881 if (!qa) {
880 error = -ENOMEM; 882 error = -ENOMEM;
881 goto out_gunlock; 883 goto out_gunlock;
882 } 884 }
@@ -885,9 +887,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
885 if (error) 887 if (error)
886 goto out_alloc; 888 goto out_alloc;
887 889
888 al->al_requested = sdp->sd_max_dirres; 890 error = gfs2_inplace_reserve(dip, sdp->sd_max_dirres);
889
890 error = gfs2_inplace_reserve(dip);
891 if (error) 891 if (error)
892 goto out_gunlock_q; 892 goto out_gunlock_q;
893 893
@@ -930,7 +930,7 @@ out_gunlock_q:
930 gfs2_quota_unlock(dip); 930 gfs2_quota_unlock(dip);
931out_alloc: 931out_alloc:
932 if (alloc_required) 932 if (alloc_required)
933 gfs2_alloc_put(dip); 933 gfs2_qadata_put(dip);
934out_gunlock: 934out_gunlock:
935 gfs2_glock_dq(ghs + 1); 935 gfs2_glock_dq(ghs + 1);
936out_child: 936out_child:
@@ -1037,12 +1037,14 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
1037 struct buffer_head *bh; 1037 struct buffer_head *bh;
1038 struct gfs2_holder ghs[3]; 1038 struct gfs2_holder ghs[3];
1039 struct gfs2_rgrpd *rgd; 1039 struct gfs2_rgrpd *rgd;
1040 int error; 1040 int error = -EROFS;
1041 1041
1042 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); 1042 gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
1043 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); 1043 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
1044 1044
1045 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); 1045 rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
1046 if (!rgd)
1047 goto out_inodes;
1046 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); 1048 gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
1047 1049
1048 1050
@@ -1088,12 +1090,13 @@ out_end_trans:
1088out_gunlock: 1090out_gunlock:
1089 gfs2_glock_dq(ghs + 2); 1091 gfs2_glock_dq(ghs + 2);
1090out_rgrp: 1092out_rgrp:
1091 gfs2_holder_uninit(ghs + 2);
1092 gfs2_glock_dq(ghs + 1); 1093 gfs2_glock_dq(ghs + 1);
1093out_child: 1094out_child:
1094 gfs2_holder_uninit(ghs + 1);
1095 gfs2_glock_dq(ghs); 1095 gfs2_glock_dq(ghs);
1096out_parent: 1096out_parent:
1097 gfs2_holder_uninit(ghs + 2);
1098out_inodes:
1099 gfs2_holder_uninit(ghs + 1);
1097 gfs2_holder_uninit(ghs); 1100 gfs2_holder_uninit(ghs);
1098 return error; 1101 return error;
1099} 1102}
@@ -1129,7 +1132,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
1129 * Returns: errno 1132 * Returns: errno
1130 */ 1133 */
1131 1134
1132static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1135static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1133{ 1136{
1134 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0); 1137 return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0);
1135} 1138}
@@ -1143,7 +1146,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1143 * 1146 *
1144 */ 1147 */
1145 1148
1146static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, 1149static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
1147 dev_t dev) 1150 dev_t dev)
1148{ 1151{
1149 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0); 1152 return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0);
@@ -1350,8 +1353,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1350 error = 0; 1353 error = 0;
1351 1354
1352 if (alloc_required) { 1355 if (alloc_required) {
1353 struct gfs2_alloc *al = gfs2_alloc_get(ndip); 1356 struct gfs2_qadata *qa = gfs2_qadata_get(ndip);
1354 if (!al) { 1357
1358 if (!qa) {
1355 error = -ENOMEM; 1359 error = -ENOMEM;
1356 goto out_gunlock; 1360 goto out_gunlock;
1357 } 1361 }
@@ -1360,9 +1364,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1360 if (error) 1364 if (error)
1361 goto out_alloc; 1365 goto out_alloc;
1362 1366
1363 al->al_requested = sdp->sd_max_dirres; 1367 error = gfs2_inplace_reserve(ndip, sdp->sd_max_dirres);
1364
1365 error = gfs2_inplace_reserve(ndip);
1366 if (error) 1368 if (error)
1367 goto out_gunlock_q; 1369 goto out_gunlock_q;
1368 1370
@@ -1423,7 +1425,7 @@ out_gunlock_q:
1423 gfs2_quota_unlock(ndip); 1425 gfs2_quota_unlock(ndip);
1424out_alloc: 1426out_alloc:
1425 if (alloc_required) 1427 if (alloc_required)
1426 gfs2_alloc_put(ndip); 1428 gfs2_qadata_put(ndip);
1427out_gunlock: 1429out_gunlock:
1428 while (x--) { 1430 while (x--) {
1429 gfs2_glock_dq(ghs + x); 1431 gfs2_glock_dq(ghs + x);
@@ -1584,7 +1586,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
1584 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) 1586 if (!(attr->ia_valid & ATTR_GID) || ogid == ngid)
1585 ogid = ngid = NO_QUOTA_CHANGE; 1587 ogid = ngid = NO_QUOTA_CHANGE;
1586 1588
1587 if (!gfs2_alloc_get(ip)) 1589 if (!gfs2_qadata_get(ip))
1588 return -ENOMEM; 1590 return -ENOMEM;
1589 1591
1590 error = gfs2_quota_lock(ip, nuid, ngid); 1592 error = gfs2_quota_lock(ip, nuid, ngid);
@@ -1616,7 +1618,7 @@ out_end_trans:
1616out_gunlock_q: 1618out_gunlock_q:
1617 gfs2_quota_unlock(ip); 1619 gfs2_quota_unlock(ip);
1618out_alloc: 1620out_alloc:
1619 gfs2_alloc_put(ip); 1621 gfs2_qadata_put(ip);
1620 return error; 1622 return error;
1621} 1623}
1622 1624
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 598646434362..756fae9eaf8f 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -626,7 +626,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) 626 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
627 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); 627 submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
628 else 628 else
629 submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); 629 submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
630 wait_on_buffer(bh); 630 wait_on_buffer(bh);
631 631
632 if (!buffer_uptodate(bh)) 632 if (!buffer_uptodate(bh))
@@ -951,8 +951,8 @@ int gfs2_logd(void *data)
951 wake_up(&sdp->sd_log_waitq); 951 wake_up(&sdp->sd_log_waitq);
952 952
953 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 953 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
954 if (freezing(current)) 954
955 refrigerator(); 955 try_to_freeze();
956 956
957 do { 957 do {
958 prepare_to_wait(&sdp->sd_logd_waitq, &wait, 958 prepare_to_wait(&sdp->sd_logd_waitq, &wait,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 8a139ff1919f..c150298e2d8e 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -40,7 +40,8 @@ static void gfs2_init_inode_once(void *foo)
40 inode_init_once(&ip->i_inode); 40 inode_init_once(&ip->i_inode);
41 init_rwsem(&ip->i_rw_mutex); 41 init_rwsem(&ip->i_rw_mutex);
42 INIT_LIST_HEAD(&ip->i_trunc_list); 42 INIT_LIST_HEAD(&ip->i_trunc_list);
43 ip->i_alloc = NULL; 43 ip->i_qadata = NULL;
44 ip->i_res = NULL;
44 ip->i_hash_cache = NULL; 45 ip->i_hash_cache = NULL;
45} 46}
46 47
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index be29858900f6..181586e673f9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
435 if (buffer_uptodate(first_bh)) 435 if (buffer_uptodate(first_bh))
436 goto out; 436 goto out;
437 if (!buffer_locked(first_bh)) 437 if (!buffer_locked(first_bh))
438 ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); 438 ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
439 439
440 dblock++; 440 dblock++;
441 extlen--; 441 extlen--;
@@ -444,7 +444,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
444 bh = gfs2_getbuf(gl, dblock, CREATE); 444 bh = gfs2_getbuf(gl, dblock, CREATE);
445 445
446 if (!buffer_uptodate(bh) && !buffer_locked(bh)) 446 if (!buffer_uptodate(bh) && !buffer_locked(bh))
447 ll_rw_block(READA, 1, &bh); 447 ll_rw_block(READA | REQ_META, 1, &bh);
448 brelse(bh); 448 brelse(bh);
449 dblock++; 449 dblock++;
450 extlen--; 450 extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cb23c2be731a..fe72e79e6ff9 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
224 224
225 bio->bi_end_io = end_bio_io_page; 225 bio->bi_end_io = end_bio_io_page;
226 bio->bi_private = page; 226 bio->bi_private = page;
227 submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); 227 submit_bio(READ_SYNC | REQ_META, bio);
228 wait_on_page_locked(page); 228 wait_on_page_locked(page);
229 bio_put(bio); 229 bio_put(bio);
230 if (!PageUptodate(page)) { 230 if (!PageUptodate(page)) {
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 7e528dc14f85..a45b21b03915 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -494,11 +494,11 @@ static void qdsb_put(struct gfs2_quota_data *qd)
494int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid) 494int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
495{ 495{
496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 496 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
497 struct gfs2_alloc *al = ip->i_alloc; 497 struct gfs2_qadata *qa = ip->i_qadata;
498 struct gfs2_quota_data **qd = al->al_qd; 498 struct gfs2_quota_data **qd = qa->qa_qd;
499 int error; 499 int error;
500 500
501 if (gfs2_assert_warn(sdp, !al->al_qd_num) || 501 if (gfs2_assert_warn(sdp, !qa->qa_qd_num) ||
502 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) 502 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
503 return -EIO; 503 return -EIO;
504 504
@@ -508,20 +508,20 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
508 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd); 508 error = qdsb_get(sdp, QUOTA_USER, ip->i_inode.i_uid, qd);
509 if (error) 509 if (error)
510 goto out; 510 goto out;
511 al->al_qd_num++; 511 qa->qa_qd_num++;
512 qd++; 512 qd++;
513 513
514 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd); 514 error = qdsb_get(sdp, QUOTA_GROUP, ip->i_inode.i_gid, qd);
515 if (error) 515 if (error)
516 goto out; 516 goto out;
517 al->al_qd_num++; 517 qa->qa_qd_num++;
518 qd++; 518 qd++;
519 519
520 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) { 520 if (uid != NO_QUOTA_CHANGE && uid != ip->i_inode.i_uid) {
521 error = qdsb_get(sdp, QUOTA_USER, uid, qd); 521 error = qdsb_get(sdp, QUOTA_USER, uid, qd);
522 if (error) 522 if (error)
523 goto out; 523 goto out;
524 al->al_qd_num++; 524 qa->qa_qd_num++;
525 qd++; 525 qd++;
526 } 526 }
527 527
@@ -529,7 +529,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, u32 uid, u32 gid)
529 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd); 529 error = qdsb_get(sdp, QUOTA_GROUP, gid, qd);
530 if (error) 530 if (error)
531 goto out; 531 goto out;
532 al->al_qd_num++; 532 qa->qa_qd_num++;
533 qd++; 533 qd++;
534 } 534 }
535 535
@@ -542,16 +542,16 @@ out:
542void gfs2_quota_unhold(struct gfs2_inode *ip) 542void gfs2_quota_unhold(struct gfs2_inode *ip)
543{ 543{
544 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 544 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
545 struct gfs2_alloc *al = ip->i_alloc; 545 struct gfs2_qadata *qa = ip->i_qadata;
546 unsigned int x; 546 unsigned int x;
547 547
548 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)); 548 gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
549 549
550 for (x = 0; x < al->al_qd_num; x++) { 550 for (x = 0; x < qa->qa_qd_num; x++) {
551 qdsb_put(al->al_qd[x]); 551 qdsb_put(qa->qa_qd[x]);
552 al->al_qd[x] = NULL; 552 qa->qa_qd[x] = NULL;
553 } 553 }
554 al->al_qd_num = 0; 554 qa->qa_qd_num = 0;
555} 555}
556 556
557static int sort_qd(const void *a, const void *b) 557static int sort_qd(const void *a, const void *b)
@@ -712,7 +712,7 @@ get_a_page:
712 set_buffer_uptodate(bh); 712 set_buffer_uptodate(bh);
713 713
714 if (!buffer_uptodate(bh)) { 714 if (!buffer_uptodate(bh)) {
715 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 715 ll_rw_block(READ | REQ_META, 1, &bh);
716 wait_on_buffer(bh); 716 wait_on_buffer(bh);
717 if (!buffer_uptodate(bh)) 717 if (!buffer_uptodate(bh))
718 goto unlock_out; 718 goto unlock_out;
@@ -762,7 +762,6 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
762 struct gfs2_quota_data *qd; 762 struct gfs2_quota_data *qd;
763 loff_t offset; 763 loff_t offset;
764 unsigned int nalloc = 0, blocks; 764 unsigned int nalloc = 0, blocks;
765 struct gfs2_alloc *al = NULL;
766 int error; 765 int error;
767 766
768 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 767 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
@@ -792,26 +791,19 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
792 nalloc++; 791 nalloc++;
793 } 792 }
794 793
795 al = gfs2_alloc_get(ip);
796 if (!al) {
797 error = -ENOMEM;
798 goto out_gunlock;
799 }
800 /* 794 /*
801 * 1 blk for unstuffing inode if stuffed. We add this extra 795 * 1 blk for unstuffing inode if stuffed. We add this extra
802 * block to the reservation unconditionally. If the inode 796 * block to the reservation unconditionally. If the inode
803 * doesn't need unstuffing, the block will be released to the 797 * doesn't need unstuffing, the block will be released to the
804 * rgrp since it won't be allocated during the transaction 798 * rgrp since it won't be allocated during the transaction
805 */ 799 */
806 al->al_requested = 1;
807 /* +3 in the end for unstuffing block, inode size update block 800 /* +3 in the end for unstuffing block, inode size update block
808 * and another block in case quota straddles page boundary and 801 * and another block in case quota straddles page boundary and
809 * two blocks need to be updated instead of 1 */ 802 * two blocks need to be updated instead of 1 */
810 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; 803 blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3;
811 804
812 if (nalloc) 805 error = gfs2_inplace_reserve(ip, 1 +
813 al->al_requested += nalloc * (data_blocks + ind_blocks); 806 (nalloc * (data_blocks + ind_blocks)));
814 error = gfs2_inplace_reserve(ip);
815 if (error) 807 if (error)
816 goto out_alloc; 808 goto out_alloc;
817 809
@@ -840,8 +832,6 @@ out_end_trans:
840out_ipres: 832out_ipres:
841 gfs2_inplace_release(ip); 833 gfs2_inplace_release(ip);
842out_alloc: 834out_alloc:
843 gfs2_alloc_put(ip);
844out_gunlock:
845 gfs2_glock_dq_uninit(&i_gh); 835 gfs2_glock_dq_uninit(&i_gh);
846out: 836out:
847 while (qx--) 837 while (qx--)
@@ -925,7 +915,7 @@ fail:
925int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) 915int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
926{ 916{
927 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 917 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
928 struct gfs2_alloc *al = ip->i_alloc; 918 struct gfs2_qadata *qa = ip->i_qadata;
929 struct gfs2_quota_data *qd; 919 struct gfs2_quota_data *qd;
930 unsigned int x; 920 unsigned int x;
931 int error = 0; 921 int error = 0;
@@ -938,15 +928,15 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
938 sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 928 sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
939 return 0; 929 return 0;
940 930
941 sort(al->al_qd, al->al_qd_num, sizeof(struct gfs2_quota_data *), 931 sort(qa->qa_qd, qa->qa_qd_num, sizeof(struct gfs2_quota_data *),
942 sort_qd, NULL); 932 sort_qd, NULL);
943 933
944 for (x = 0; x < al->al_qd_num; x++) { 934 for (x = 0; x < qa->qa_qd_num; x++) {
945 int force = NO_FORCE; 935 int force = NO_FORCE;
946 qd = al->al_qd[x]; 936 qd = qa->qa_qd[x];
947 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) 937 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
948 force = FORCE; 938 force = FORCE;
949 error = do_glock(qd, force, &al->al_qd_ghs[x]); 939 error = do_glock(qd, force, &qa->qa_qd_ghs[x]);
950 if (error) 940 if (error)
951 break; 941 break;
952 } 942 }
@@ -955,7 +945,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
955 set_bit(GIF_QD_LOCKED, &ip->i_flags); 945 set_bit(GIF_QD_LOCKED, &ip->i_flags);
956 else { 946 else {
957 while (x--) 947 while (x--)
958 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]); 948 gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
959 gfs2_quota_unhold(ip); 949 gfs2_quota_unhold(ip);
960 } 950 }
961 951
@@ -1000,7 +990,7 @@ static int need_sync(struct gfs2_quota_data *qd)
1000 990
1001void gfs2_quota_unlock(struct gfs2_inode *ip) 991void gfs2_quota_unlock(struct gfs2_inode *ip)
1002{ 992{
1003 struct gfs2_alloc *al = ip->i_alloc; 993 struct gfs2_qadata *qa = ip->i_qadata;
1004 struct gfs2_quota_data *qda[4]; 994 struct gfs2_quota_data *qda[4];
1005 unsigned int count = 0; 995 unsigned int count = 0;
1006 unsigned int x; 996 unsigned int x;
@@ -1008,14 +998,14 @@ void gfs2_quota_unlock(struct gfs2_inode *ip)
1008 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) 998 if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags))
1009 goto out; 999 goto out;
1010 1000
1011 for (x = 0; x < al->al_qd_num; x++) { 1001 for (x = 0; x < qa->qa_qd_num; x++) {
1012 struct gfs2_quota_data *qd; 1002 struct gfs2_quota_data *qd;
1013 int sync; 1003 int sync;
1014 1004
1015 qd = al->al_qd[x]; 1005 qd = qa->qa_qd[x];
1016 sync = need_sync(qd); 1006 sync = need_sync(qd);
1017 1007
1018 gfs2_glock_dq_uninit(&al->al_qd_ghs[x]); 1008 gfs2_glock_dq_uninit(&qa->qa_qd_ghs[x]);
1019 1009
1020 if (sync && qd_trylock(qd)) 1010 if (sync && qd_trylock(qd))
1021 qda[count++] = qd; 1011 qda[count++] = qd;
@@ -1048,7 +1038,7 @@ static int print_message(struct gfs2_quota_data *qd, char *type)
1048int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid) 1038int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1049{ 1039{
1050 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1040 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1051 struct gfs2_alloc *al = ip->i_alloc; 1041 struct gfs2_qadata *qa = ip->i_qadata;
1052 struct gfs2_quota_data *qd; 1042 struct gfs2_quota_data *qd;
1053 s64 value; 1043 s64 value;
1054 unsigned int x; 1044 unsigned int x;
@@ -1060,8 +1050,8 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1060 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) 1050 if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
1061 return 0; 1051 return 0;
1062 1052
1063 for (x = 0; x < al->al_qd_num; x++) { 1053 for (x = 0; x < qa->qa_qd_num; x++) {
1064 qd = al->al_qd[x]; 1054 qd = qa->qa_qd[x];
1065 1055
1066 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1056 if (!((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1067 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags)))) 1057 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))))
@@ -1099,7 +1089,7 @@ int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid)
1099void gfs2_quota_change(struct gfs2_inode *ip, s64 change, 1089void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1100 u32 uid, u32 gid) 1090 u32 uid, u32 gid)
1101{ 1091{
1102 struct gfs2_alloc *al = ip->i_alloc; 1092 struct gfs2_qadata *qa = ip->i_qadata;
1103 struct gfs2_quota_data *qd; 1093 struct gfs2_quota_data *qd;
1104 unsigned int x; 1094 unsigned int x;
1105 1095
@@ -1108,8 +1098,8 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
1108 if (ip->i_diskflags & GFS2_DIF_SYSTEM) 1098 if (ip->i_diskflags & GFS2_DIF_SYSTEM)
1109 return; 1099 return;
1110 1100
1111 for (x = 0; x < al->al_qd_num; x++) { 1101 for (x = 0; x < qa->qa_qd_num; x++) {
1112 qd = al->al_qd[x]; 1102 qd = qa->qa_qd[x];
1113 1103
1114 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) || 1104 if ((qd->qd_id == uid && test_bit(QDF_USER, &qd->qd_flags)) ||
1115 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) { 1105 (qd->qd_id == gid && !test_bit(QDF_USER, &qd->qd_flags))) {
@@ -1427,8 +1417,8 @@ int gfs2_quotad(void *data)
1427 /* Check for & recover partially truncated inodes */ 1417 /* Check for & recover partially truncated inodes */
1428 quotad_check_trunc_list(sdp); 1418 quotad_check_trunc_list(sdp);
1429 1419
1430 if (freezing(current)) 1420 try_to_freeze();
1431 refrigerator(); 1421
1432 t = min(quotad_timeo, statfs_timeo); 1422 t = min(quotad_timeo, statfs_timeo);
1433 1423
1434 prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE); 1424 prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
@@ -1529,7 +1519,6 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1529 unsigned int data_blocks, ind_blocks; 1519 unsigned int data_blocks, ind_blocks;
1530 unsigned int blocks = 0; 1520 unsigned int blocks = 0;
1531 int alloc_required; 1521 int alloc_required;
1532 struct gfs2_alloc *al;
1533 loff_t offset; 1522 loff_t offset;
1534 int error; 1523 int error;
1535 1524
@@ -1594,15 +1583,12 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1594 if (gfs2_is_stuffed(ip)) 1583 if (gfs2_is_stuffed(ip))
1595 alloc_required = 1; 1584 alloc_required = 1;
1596 if (alloc_required) { 1585 if (alloc_required) {
1597 al = gfs2_alloc_get(ip);
1598 if (al == NULL)
1599 goto out_i;
1600 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota), 1586 gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
1601 &data_blocks, &ind_blocks); 1587 &data_blocks, &ind_blocks);
1602 blocks = al->al_requested = 1 + data_blocks + ind_blocks; 1588 blocks = 1 + data_blocks + ind_blocks;
1603 error = gfs2_inplace_reserve(ip); 1589 error = gfs2_inplace_reserve(ip, blocks);
1604 if (error) 1590 if (error)
1605 goto out_alloc; 1591 goto out_i;
1606 blocks += gfs2_rg_blocks(ip); 1592 blocks += gfs2_rg_blocks(ip);
1607 } 1593 }
1608 1594
@@ -1617,11 +1603,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1617 1603
1618 gfs2_trans_end(sdp); 1604 gfs2_trans_end(sdp);
1619out_release: 1605out_release:
1620 if (alloc_required) { 1606 if (alloc_required)
1621 gfs2_inplace_release(ip); 1607 gfs2_inplace_release(ip);
1622out_alloc:
1623 gfs2_alloc_put(ip);
1624 }
1625out_i: 1608out_i:
1626 gfs2_glock_dq_uninit(&i_gh); 1609 gfs2_glock_dq_uninit(&i_gh);
1627out_q: 1610out_q:
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 96bd6d759f29..22234627f684 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -65,8 +65,8 @@ static const char valid_change[16] = {
65}; 65};
66 66
67static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 67static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
68 unsigned char old_state, unsigned char new_state, 68 unsigned char old_state,
69 unsigned int *n); 69 struct gfs2_bitmap **rbi);
70 70
71/** 71/**
72 * gfs2_setbit - Set a bit in the bitmaps 72 * gfs2_setbit - Set a bit in the bitmaps
@@ -860,22 +860,36 @@ fail:
860} 860}
861 861
862/** 862/**
863 * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode 863 * gfs2_qadata_get - get the struct gfs2_qadata structure for an inode
864 * @ip: the incore GFS2 inode structure 864 * @ip: the incore GFS2 inode structure
865 * 865 *
866 * Returns: the struct gfs2_alloc 866 * Returns: the struct gfs2_qadata
867 */ 867 */
868 868
869struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) 869struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip)
870{ 870{
871 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 871 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
872 int error; 872 int error;
873 BUG_ON(ip->i_alloc != NULL); 873 BUG_ON(ip->i_qadata != NULL);
874 ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); 874 ip->i_qadata = kzalloc(sizeof(struct gfs2_qadata), GFP_NOFS);
875 error = gfs2_rindex_update(sdp); 875 error = gfs2_rindex_update(sdp);
876 if (error) 876 if (error)
877 fs_warn(sdp, "rindex update returns %d\n", error); 877 fs_warn(sdp, "rindex update returns %d\n", error);
878 return ip->i_alloc; 878 return ip->i_qadata;
879}
880
881/**
882 * gfs2_blkrsv_get - get the struct gfs2_blkreserv structure for an inode
883 * @ip: the incore GFS2 inode structure
884 *
885 * Returns: the struct gfs2_qadata
886 */
887
888static struct gfs2_blkreserv *gfs2_blkrsv_get(struct gfs2_inode *ip)
889{
890 BUG_ON(ip->i_res != NULL);
891 ip->i_res = kzalloc(sizeof(struct gfs2_blkreserv), GFP_NOFS);
892 return ip->i_res;
879} 893}
880 894
881/** 895/**
@@ -890,15 +904,20 @@ struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
890 904
891static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip) 905static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
892{ 906{
893 const struct gfs2_alloc *al = ip->i_alloc; 907 const struct gfs2_blkreserv *rs = ip->i_res;
894 908
895 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) 909 if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
896 return 0; 910 return 0;
897 if (rgd->rd_free_clone >= al->al_requested) 911 if (rgd->rd_free_clone >= rs->rs_requested)
898 return 1; 912 return 1;
899 return 0; 913 return 0;
900} 914}
901 915
916static inline u32 gfs2_bi2rgd_blk(struct gfs2_bitmap *bi, u32 blk)
917{
918 return (bi->bi_start * GFS2_NBBY) + blk;
919}
920
902/** 921/**
903 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes 922 * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes
904 * @rgd: The rgrp 923 * @rgd: The rgrp
@@ -912,20 +931,20 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
912 u32 goal = 0, block; 931 u32 goal = 0, block;
913 u64 no_addr; 932 u64 no_addr;
914 struct gfs2_sbd *sdp = rgd->rd_sbd; 933 struct gfs2_sbd *sdp = rgd->rd_sbd;
915 unsigned int n;
916 struct gfs2_glock *gl; 934 struct gfs2_glock *gl;
917 struct gfs2_inode *ip; 935 struct gfs2_inode *ip;
918 int error; 936 int error;
919 int found = 0; 937 int found = 0;
938 struct gfs2_bitmap *bi;
920 939
921 while (goal < rgd->rd_data) { 940 while (goal < rgd->rd_data) {
922 down_write(&sdp->sd_log_flush_lock); 941 down_write(&sdp->sd_log_flush_lock);
923 n = 1; 942 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, &bi);
924 block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
925 GFS2_BLKST_UNLINKED, &n);
926 up_write(&sdp->sd_log_flush_lock); 943 up_write(&sdp->sd_log_flush_lock);
927 if (block == BFITNOENT) 944 if (block == BFITNOENT)
928 break; 945 break;
946
947 block = gfs2_bi2rgd_blk(bi, block);
929 /* rgblk_search can return a block < goal, so we need to 948 /* rgblk_search can return a block < goal, so we need to
930 keep it marching forward. */ 949 keep it marching forward. */
931 no_addr = block + rgd->rd_data0; 950 no_addr = block + rgd->rd_data0;
@@ -977,8 +996,8 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
977{ 996{
978 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 997 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
979 struct gfs2_rgrpd *rgd, *begin = NULL; 998 struct gfs2_rgrpd *rgd, *begin = NULL;
980 struct gfs2_alloc *al = ip->i_alloc; 999 struct gfs2_blkreserv *rs = ip->i_res;
981 int error, rg_locked; 1000 int error, rg_locked, flags = LM_FLAG_TRY;
982 int loops = 0; 1001 int loops = 0;
983 1002
984 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) 1003 if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
@@ -997,7 +1016,7 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
997 error = 0; 1016 error = 0;
998 } else { 1017 } else {
999 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 1018 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
1000 LM_FLAG_TRY, &al->al_rgd_gh); 1019 flags, &rs->rs_rgd_gh);
1001 } 1020 }
1002 switch (error) { 1021 switch (error) {
1003 case 0: 1022 case 0:
@@ -1008,12 +1027,14 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1008 if (rgd->rd_flags & GFS2_RDF_CHECK) 1027 if (rgd->rd_flags & GFS2_RDF_CHECK)
1009 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr); 1028 try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
1010 if (!rg_locked) 1029 if (!rg_locked)
1011 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1030 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1012 /* fall through */ 1031 /* fall through */
1013 case GLR_TRYFAILED: 1032 case GLR_TRYFAILED:
1014 rgd = gfs2_rgrpd_get_next(rgd); 1033 rgd = gfs2_rgrpd_get_next(rgd);
1015 if (rgd == begin) 1034 if (rgd == begin) {
1035 flags = 0;
1016 loops++; 1036 loops++;
1037 }
1017 break; 1038 break;
1018 default: 1039 default:
1019 return error; 1040 return error;
@@ -1023,6 +1044,13 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1023 return -ENOSPC; 1044 return -ENOSPC;
1024} 1045}
1025 1046
1047static void gfs2_blkrsv_put(struct gfs2_inode *ip)
1048{
1049 BUG_ON(ip->i_res == NULL);
1050 kfree(ip->i_res);
1051 ip->i_res = NULL;
1052}
1053
1026/** 1054/**
1027 * gfs2_inplace_reserve - Reserve space in the filesystem 1055 * gfs2_inplace_reserve - Reserve space in the filesystem
1028 * @ip: the inode to reserve space for 1056 * @ip: the inode to reserve space for
@@ -1030,16 +1058,23 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
1030 * Returns: errno 1058 * Returns: errno
1031 */ 1059 */
1032 1060
1033int gfs2_inplace_reserve(struct gfs2_inode *ip) 1061int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1034{ 1062{
1035 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1063 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1036 struct gfs2_alloc *al = ip->i_alloc; 1064 struct gfs2_blkreserv *rs;
1037 int error = 0; 1065 int error = 0;
1038 u64 last_unlinked = NO_BLOCK; 1066 u64 last_unlinked = NO_BLOCK;
1039 int tries = 0; 1067 int tries = 0;
1040 1068
1041 if (gfs2_assert_warn(sdp, al->al_requested)) 1069 rs = gfs2_blkrsv_get(ip);
1042 return -EINVAL; 1070 if (!rs)
1071 return -ENOMEM;
1072
1073 rs->rs_requested = requested;
1074 if (gfs2_assert_warn(sdp, requested)) {
1075 error = -EINVAL;
1076 goto out;
1077 }
1043 1078
1044 do { 1079 do {
1045 error = get_local_rgrp(ip, &last_unlinked); 1080 error = get_local_rgrp(ip, &last_unlinked);
@@ -1056,6 +1091,9 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip)
1056 gfs2_log_flush(sdp, NULL); 1091 gfs2_log_flush(sdp, NULL);
1057 } while (tries++ < 3); 1092 } while (tries++ < 3);
1058 1093
1094out:
1095 if (error)
1096 gfs2_blkrsv_put(ip);
1059 return error; 1097 return error;
1060} 1098}
1061 1099
@@ -1068,10 +1106,11 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip)
1068 1106
1069void gfs2_inplace_release(struct gfs2_inode *ip) 1107void gfs2_inplace_release(struct gfs2_inode *ip)
1070{ 1108{
1071 struct gfs2_alloc *al = ip->i_alloc; 1109 struct gfs2_blkreserv *rs = ip->i_res;
1072 1110
1073 if (al->al_rgd_gh.gh_gl) 1111 gfs2_blkrsv_put(ip);
1074 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1112 if (rs->rs_rgd_gh.gh_gl)
1113 gfs2_glock_dq_uninit(&rs->rs_rgd_gh);
1075} 1114}
1076 1115
1077/** 1116/**
@@ -1108,39 +1147,35 @@ static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1108} 1147}
1109 1148
1110/** 1149/**
1111 * rgblk_search - find a block in @old_state, change allocation 1150 * rgblk_search - find a block in @state
1112 * state to @new_state
1113 * @rgd: the resource group descriptor 1151 * @rgd: the resource group descriptor
1114 * @goal: the goal block within the RG (start here to search for avail block) 1152 * @goal: the goal block within the RG (start here to search for avail block)
1115 * @old_state: GFS2_BLKST_XXX the before-allocation state to find 1153 * @state: GFS2_BLKST_XXX the before-allocation state to find
1116 * @new_state: GFS2_BLKST_XXX the after-allocation block state 1154 * @dinode: TRUE if the first block we allocate is for a dinode
1117 * @n: The extent length 1155 * @rbi: address of the pointer to the bitmap containing the block found
1118 * 1156 *
1119 * Walk rgrp's bitmap to find bits that represent a block in @old_state. 1157 * Walk rgrp's bitmap to find bits that represent a block in @state.
1120 * Add the found bitmap buffer to the transaction.
1121 * Set the found bits to @new_state to change block's allocation state.
1122 * 1158 *
1123 * This function never fails, because we wouldn't call it unless we 1159 * This function never fails, because we wouldn't call it unless we
1124 * know (from reservation results, etc.) that a block is available. 1160 * know (from reservation results, etc.) that a block is available.
1125 * 1161 *
1126 * Scope of @goal and returned block is just within rgrp, not the whole 1162 * Scope of @goal is just within rgrp, not the whole filesystem.
1127 * filesystem. 1163 * Scope of @returned block is just within bitmap, not the whole filesystem.
1128 * 1164 *
1129 * Returns: the block number allocated 1165 * Returns: the block number found relative to the bitmap rbi
1130 */ 1166 */
1131 1167
1132static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, 1168static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal,
1133 unsigned char old_state, unsigned char new_state, 1169 unsigned char state,
1134 unsigned int *n) 1170 struct gfs2_bitmap **rbi)
1135{ 1171{
1136 struct gfs2_bitmap *bi = NULL; 1172 struct gfs2_bitmap *bi = NULL;
1137 const u32 length = rgd->rd_length; 1173 const u32 length = rgd->rd_length;
1138 u32 blk = BFITNOENT; 1174 u32 blk = BFITNOENT;
1139 unsigned int buf, x; 1175 unsigned int buf, x;
1140 const unsigned int elen = *n;
1141 const u8 *buffer = NULL; 1176 const u8 *buffer = NULL;
1142 1177
1143 *n = 0; 1178 *rbi = NULL;
1144 /* Find bitmap block that contains bits for goal block */ 1179 /* Find bitmap block that contains bits for goal block */
1145 for (buf = 0; buf < length; buf++) { 1180 for (buf = 0; buf < length; buf++) {
1146 bi = rgd->rd_bits + buf; 1181 bi = rgd->rd_bits + buf;
@@ -1163,21 +1198,21 @@ do_search:
1163 bi = rgd->rd_bits + buf; 1198 bi = rgd->rd_bits + buf;
1164 1199
1165 if (test_bit(GBF_FULL, &bi->bi_flags) && 1200 if (test_bit(GBF_FULL, &bi->bi_flags) &&
1166 (old_state == GFS2_BLKST_FREE)) 1201 (state == GFS2_BLKST_FREE))
1167 goto skip; 1202 goto skip;
1168 1203
1169 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone 1204 /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
1170 bitmaps, so we must search the originals for that. */ 1205 bitmaps, so we must search the originals for that. */
1171 buffer = bi->bi_bh->b_data + bi->bi_offset; 1206 buffer = bi->bi_bh->b_data + bi->bi_offset;
1172 WARN_ON(!buffer_uptodate(bi->bi_bh)); 1207 WARN_ON(!buffer_uptodate(bi->bi_bh));
1173 if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) 1208 if (state != GFS2_BLKST_UNLINKED && bi->bi_clone)
1174 buffer = bi->bi_clone + bi->bi_offset; 1209 buffer = bi->bi_clone + bi->bi_offset;
1175 1210
1176 blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state); 1211 blk = gfs2_bitfit(buffer, bi->bi_len, goal, state);
1177 if (blk != BFITNOENT) 1212 if (blk != BFITNOENT)
1178 break; 1213 break;
1179 1214
1180 if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) 1215 if ((goal == 0) && (state == GFS2_BLKST_FREE))
1181 set_bit(GBF_FULL, &bi->bi_flags); 1216 set_bit(GBF_FULL, &bi->bi_flags);
1182 1217
1183 /* Try next bitmap block (wrap back to rgrp header if at end) */ 1218 /* Try next bitmap block (wrap back to rgrp header if at end) */
@@ -1187,16 +1222,37 @@ skip:
1187 goal = 0; 1222 goal = 0;
1188 } 1223 }
1189 1224
1190 if (blk == BFITNOENT) 1225 if (blk != BFITNOENT)
1191 return blk; 1226 *rbi = bi;
1192 1227
1193 *n = 1; 1228 return blk;
1194 if (old_state == new_state) 1229}
1195 goto out; 1230
1231/**
1232 * gfs2_alloc_extent - allocate an extent from a given bitmap
1233 * @rgd: the resource group descriptor
1234 * @bi: the bitmap within the rgrp
1235 * @blk: the block within the bitmap
1236 * @dinode: TRUE if the first block we allocate is for a dinode
1237 * @n: The extent length
1238 *
1239 * Add the found bitmap buffer to the transaction.
1240 * Set the found bits to @new_state to change block's allocation state.
1241 * Returns: starting block number of the extent (fs scope)
1242 */
1243static u64 gfs2_alloc_extent(struct gfs2_rgrpd *rgd, struct gfs2_bitmap *bi,
1244 u32 blk, bool dinode, unsigned int *n)
1245{
1246 const unsigned int elen = *n;
1247 u32 goal;
1248 const u8 *buffer = NULL;
1196 1249
1250 *n = 0;
1251 buffer = bi->bi_bh->b_data + bi->bi_offset;
1197 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); 1252 gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1);
1198 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1253 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1199 bi, blk, new_state); 1254 bi, blk, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1255 (*n)++;
1200 goal = blk; 1256 goal = blk;
1201 while (*n < elen) { 1257 while (*n < elen) {
1202 goal++; 1258 goal++;
@@ -1206,11 +1262,12 @@ skip:
1206 GFS2_BLKST_FREE) 1262 GFS2_BLKST_FREE)
1207 break; 1263 break;
1208 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, 1264 gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset,
1209 bi, goal, new_state); 1265 bi, goal, GFS2_BLKST_USED);
1210 (*n)++; 1266 (*n)++;
1211 } 1267 }
1212out: 1268 blk = gfs2_bi2rgd_blk(bi, blk);
1213 return (bi->bi_start * GFS2_NBBY) + blk; 1269 rgd->rd_last_alloc = blk + *n - 1;
1270 return rgd->rd_data0 + blk;
1214} 1271}
1215 1272
1216/** 1273/**
@@ -1298,121 +1355,93 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1298} 1355}
1299 1356
1300/** 1357/**
1301 * gfs2_alloc_block - Allocate one or more blocks 1358 * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode
1302 * @ip: the inode to allocate the block for 1359 * @ip: the inode to allocate the block for
1303 * @bn: Used to return the starting block number 1360 * @bn: Used to return the starting block number
1304 * @n: requested number of blocks/extent length (value/result) 1361 * @ndata: requested number of blocks/extent length (value/result)
1362 * @dinode: 1 if we're allocating a dinode block, else 0
1363 * @generation: the generation number of the inode
1305 * 1364 *
1306 * Returns: 0 or error 1365 * Returns: 0 or error
1307 */ 1366 */
1308 1367
1309int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) 1368int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
1369 bool dinode, u64 *generation)
1310{ 1370{
1311 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1371 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1312 struct buffer_head *dibh; 1372 struct buffer_head *dibh;
1313 struct gfs2_alloc *al = ip->i_alloc;
1314 struct gfs2_rgrpd *rgd; 1373 struct gfs2_rgrpd *rgd;
1315 u32 goal, blk; 1374 unsigned int ndata;
1316 u64 block; 1375 u32 goal, blk; /* block, within the rgrp scope */
1376 u64 block; /* block, within the file system scope */
1317 int error; 1377 int error;
1378 struct gfs2_bitmap *bi;
1318 1379
1319 /* Only happens if there is a bug in gfs2, return something distinctive 1380 /* Only happens if there is a bug in gfs2, return something distinctive
1320 * to ensure that it is noticed. 1381 * to ensure that it is noticed.
1321 */ 1382 */
1322 if (al == NULL) 1383 if (ip->i_res == NULL)
1323 return -ECANCELED; 1384 return -ECANCELED;
1324 1385
1325 rgd = ip->i_rgd; 1386 rgd = ip->i_rgd;
1326 1387
1327 if (rgrp_contains_block(rgd, ip->i_goal)) 1388 if (!dinode && rgrp_contains_block(rgd, ip->i_goal))
1328 goal = ip->i_goal - rgd->rd_data0; 1389 goal = ip->i_goal - rgd->rd_data0;
1329 else 1390 else
1330 goal = rgd->rd_last_alloc; 1391 goal = rgd->rd_last_alloc;
1331 1392
1332 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); 1393 blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, &bi);
1333 1394
1334 /* Since all blocks are reserved in advance, this shouldn't happen */ 1395 /* Since all blocks are reserved in advance, this shouldn't happen */
1335 if (blk == BFITNOENT) 1396 if (blk == BFITNOENT)
1336 goto rgrp_error; 1397 goto rgrp_error;
1337 1398
1338 rgd->rd_last_alloc = blk; 1399 block = gfs2_alloc_extent(rgd, bi, blk, dinode, nblocks);
1339 block = rgd->rd_data0 + blk; 1400 ndata = *nblocks;
1340 ip->i_goal = block + *n - 1; 1401 if (dinode)
1341 error = gfs2_meta_inode_buffer(ip, &dibh); 1402 ndata--;
1342 if (error == 0) { 1403
1343 struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; 1404 if (!dinode) {
1344 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 1405 ip->i_goal = block + ndata - 1;
1345 di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); 1406 error = gfs2_meta_inode_buffer(ip, &dibh);
1346 brelse(dibh); 1407 if (error == 0) {
1408 struct gfs2_dinode *di =
1409 (struct gfs2_dinode *)dibh->b_data;
1410 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1411 di->di_goal_meta = di->di_goal_data =
1412 cpu_to_be64(ip->i_goal);
1413 brelse(dibh);
1414 }
1347 } 1415 }
1348 if (rgd->rd_free < *n) 1416 if (rgd->rd_free < *nblocks)
1349 goto rgrp_error; 1417 goto rgrp_error;
1350 1418
1351 rgd->rd_free -= *n; 1419 rgd->rd_free -= *nblocks;
1352 1420 if (dinode) {
1353 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1421 rgd->rd_dinodes++;
1354 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1355
1356 al->al_alloced += *n;
1357
1358 gfs2_statfs_change(sdp, 0, -(s64)*n, 0);
1359 gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
1360
1361 rgd->rd_free_clone -= *n;
1362 trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
1363 *bn = block;
1364 return 0;
1365
1366rgrp_error:
1367 gfs2_rgrp_error(rgd);
1368 return -EIO;
1369}
1370
1371/**
1372 * gfs2_alloc_di - Allocate a dinode
1373 * @dip: the directory that the inode is going in
1374 * @bn: the block number which is allocated
1375 * @generation: the generation number of the inode
1376 *
1377 * Returns: 0 on success or error
1378 */
1379
1380int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
1381{
1382 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1383 struct gfs2_alloc *al = dip->i_alloc;
1384 struct gfs2_rgrpd *rgd = dip->i_rgd;
1385 u32 blk;
1386 u64 block;
1387 unsigned int n = 1;
1388
1389 blk = rgblk_search(rgd, rgd->rd_last_alloc,
1390 GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
1391
1392 /* Since all blocks are reserved in advance, this shouldn't happen */
1393 if (blk == BFITNOENT)
1394 goto rgrp_error;
1395
1396 rgd->rd_last_alloc = blk;
1397 block = rgd->rd_data0 + blk;
1398 if (rgd->rd_free == 0)
1399 goto rgrp_error;
1400
1401 rgd->rd_free--;
1402 rgd->rd_dinodes++;
1403 *generation = rgd->rd_igeneration++;
1404 if (*generation == 0)
1405 *generation = rgd->rd_igeneration++; 1422 *generation = rgd->rd_igeneration++;
1423 if (*generation == 0)
1424 *generation = rgd->rd_igeneration++;
1425 }
1426
1406 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1427 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1407 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1428 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1408 1429
1409 al->al_alloced++; 1430 gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
1431 if (dinode)
1432 gfs2_trans_add_unrevoke(sdp, block, 1);
1410 1433
1411 gfs2_statfs_change(sdp, 0, -1, +1); 1434 /*
1412 gfs2_trans_add_unrevoke(sdp, block, 1); 1435 * This needs reviewing to see why we cannot do the quota change
1436 * at this point in the dinode case.
1437 */
1438 if (ndata)
1439 gfs2_quota_change(ip, ndata, ip->i_inode.i_uid,
1440 ip->i_inode.i_gid);
1413 1441
1414 rgd->rd_free_clone--; 1442 rgd->rd_free_clone -= *nblocks;
1415 trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); 1443 trace_gfs2_block_alloc(ip, block, *nblocks,
1444 dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
1416 *bn = block; 1445 *bn = block;
1417 return 0; 1446 return 0;
1418 1447
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index cf5c50180192..ceec9106cdf4 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -28,19 +28,19 @@ extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
28extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh); 28extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
29extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh); 29extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
30 30
31extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); 31extern struct gfs2_qadata *gfs2_qadata_get(struct gfs2_inode *ip);
32static inline void gfs2_alloc_put(struct gfs2_inode *ip) 32static inline void gfs2_qadata_put(struct gfs2_inode *ip)
33{ 33{
34 BUG_ON(ip->i_alloc == NULL); 34 BUG_ON(ip->i_qadata == NULL);
35 kfree(ip->i_alloc); 35 kfree(ip->i_qadata);
36 ip->i_alloc = NULL; 36 ip->i_qadata = NULL;
37} 37}
38 38
39extern int gfs2_inplace_reserve(struct gfs2_inode *ip); 39extern int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested);
40extern void gfs2_inplace_release(struct gfs2_inode *ip); 40extern void gfs2_inplace_release(struct gfs2_inode *ip);
41 41
42extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 42extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
43extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 43 bool dinode, u64 *generation);
44 44
45extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta); 45extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
46extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 46extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 71e420989f77..4553ce515f62 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1284,18 +1284,18 @@ static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
1284/** 1284/**
1285 * gfs2_show_options - Show mount options for /proc/mounts 1285 * gfs2_show_options - Show mount options for /proc/mounts
1286 * @s: seq_file structure 1286 * @s: seq_file structure
1287 * @mnt: vfsmount 1287 * @root: root of this (sub)tree
1288 * 1288 *
1289 * Returns: 0 on success or error code 1289 * Returns: 0 on success or error code
1290 */ 1290 */
1291 1291
1292static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) 1292static int gfs2_show_options(struct seq_file *s, struct dentry *root)
1293{ 1293{
1294 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; 1294 struct gfs2_sbd *sdp = root->d_sb->s_fs_info;
1295 struct gfs2_args *args = &sdp->sd_args; 1295 struct gfs2_args *args = &sdp->sd_args;
1296 int val; 1296 int val;
1297 1297
1298 if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) 1298 if (is_ancestor(root, sdp->sd_master_dir))
1299 seq_printf(s, ",meta"); 1299 seq_printf(s, ",meta");
1300 if (args->ar_lockproto[0]) 1300 if (args->ar_lockproto[0])
1301 seq_printf(s, ",lockproto=%s", args->ar_lockproto); 1301 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
@@ -1399,8 +1399,9 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
1399static int gfs2_dinode_dealloc(struct gfs2_inode *ip) 1399static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1400{ 1400{
1401 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1401 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1402 struct gfs2_alloc *al; 1402 struct gfs2_qadata *qa;
1403 struct gfs2_rgrpd *rgd; 1403 struct gfs2_rgrpd *rgd;
1404 struct gfs2_holder gh;
1404 int error; 1405 int error;
1405 1406
1406 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) { 1407 if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
@@ -1408,8 +1409,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1408 return -EIO; 1409 return -EIO;
1409 } 1410 }
1410 1411
1411 al = gfs2_alloc_get(ip); 1412 qa = gfs2_qadata_get(ip);
1412 if (!al) 1413 if (!qa)
1413 return -ENOMEM; 1414 return -ENOMEM;
1414 1415
1415 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1416 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1423,8 +1424,7 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1423 goto out_qs; 1424 goto out_qs;
1424 } 1425 }
1425 1426
1426 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 1427 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1427 &al->al_rgd_gh);
1428 if (error) 1428 if (error)
1429 goto out_qs; 1429 goto out_qs;
1430 1430
@@ -1440,11 +1440,11 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
1440 gfs2_trans_end(sdp); 1440 gfs2_trans_end(sdp);
1441 1441
1442out_rg_gunlock: 1442out_rg_gunlock:
1443 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1443 gfs2_glock_dq_uninit(&gh);
1444out_qs: 1444out_qs:
1445 gfs2_quota_unhold(ip); 1445 gfs2_quota_unhold(ip);
1446out: 1446out:
1447 gfs2_alloc_put(ip); 1447 gfs2_qadata_put(ip);
1448 return error; 1448 return error;
1449} 1449}
1450 1450
@@ -1582,7 +1582,6 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
1582static void gfs2_i_callback(struct rcu_head *head) 1582static void gfs2_i_callback(struct rcu_head *head)
1583{ 1583{
1584 struct inode *inode = container_of(head, struct inode, i_rcu); 1584 struct inode *inode = container_of(head, struct inode, i_rcu);
1585 INIT_LIST_HEAD(&inode->i_dentry);
1586 kmem_cache_free(gfs2_inode_cachep, inode); 1585 kmem_cache_free(gfs2_inode_cachep, inode);
1587} 1586}
1588 1587
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index f8f101ef600c..125d4572e1c0 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -30,9 +30,9 @@ struct gfs2_glock;
30 * block, or all of the blocks in the rg, whichever is smaller */ 30 * block, or all of the blocks in the rg, whichever is smaller */
31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip) 31static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
32{ 32{
33 const struct gfs2_alloc *al = ip->i_alloc; 33 const struct gfs2_blkreserv *rs = ip->i_res;
34 if (al->al_requested < ip->i_rgd->rd_length) 34 if (rs->rs_requested < ip->i_rgd->rd_length)
35 return al->al_requested + 1; 35 return rs->rs_requested + 1;
36 return ip->i_rgd->rd_length; 36 return ip->i_rgd->rd_length;
37} 37}
38 38
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 71d7bf830c09..e9636591b5d5 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -321,11 +321,11 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
321 struct gfs2_ea_header *ea, 321 struct gfs2_ea_header *ea,
322 struct gfs2_ea_header *prev, int leave) 322 struct gfs2_ea_header *prev, int leave)
323{ 323{
324 struct gfs2_alloc *al; 324 struct gfs2_qadata *qa;
325 int error; 325 int error;
326 326
327 al = gfs2_alloc_get(ip); 327 qa = gfs2_qadata_get(ip);
328 if (!al) 328 if (!qa)
329 return -ENOMEM; 329 return -ENOMEM;
330 330
331 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 331 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -336,7 +336,7 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
336 336
337 gfs2_quota_unhold(ip); 337 gfs2_quota_unhold(ip);
338out_alloc: 338out_alloc:
339 gfs2_alloc_put(ip); 339 gfs2_qadata_put(ip);
340 return error; 340 return error;
341} 341}
342 342
@@ -549,9 +549,10 @@ int gfs2_xattr_acl_get(struct gfs2_inode *ip, const char *name, char **ppdata)
549 goto out; 549 goto out;
550 550
551 error = gfs2_ea_get_copy(ip, &el, data, len); 551 error = gfs2_ea_get_copy(ip, &el, data, len);
552 if (error == 0) 552 if (error < 0)
553 error = len; 553 kfree(data);
554 *ppdata = data; 554 else
555 *ppdata = data;
555out: 556out:
556 brelse(el.el_bh); 557 brelse(el.el_bh);
557 return error; 558 return error;
@@ -609,7 +610,7 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp)
609 u64 block; 610 u64 block;
610 int error; 611 int error;
611 612
612 error = gfs2_alloc_block(ip, &block, &n); 613 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
613 if (error) 614 if (error)
614 return error; 615 return error;
615 gfs2_trans_add_unrevoke(sdp, block, 1); 616 gfs2_trans_add_unrevoke(sdp, block, 1);
@@ -671,7 +672,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea,
671 int mh_size = sizeof(struct gfs2_meta_header); 672 int mh_size = sizeof(struct gfs2_meta_header);
672 unsigned int n = 1; 673 unsigned int n = 1;
673 674
674 error = gfs2_alloc_block(ip, &block, &n); 675 error = gfs2_alloc_blocks(ip, &block, &n, 0, NULL);
675 if (error) 676 if (error)
676 return error; 677 return error;
677 gfs2_trans_add_unrevoke(sdp, block, 1); 678 gfs2_trans_add_unrevoke(sdp, block, 1);
@@ -708,21 +709,19 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
708 unsigned int blks, 709 unsigned int blks,
709 ea_skeleton_call_t skeleton_call, void *private) 710 ea_skeleton_call_t skeleton_call, void *private)
710{ 711{
711 struct gfs2_alloc *al; 712 struct gfs2_qadata *qa;
712 struct buffer_head *dibh; 713 struct buffer_head *dibh;
713 int error; 714 int error;
714 715
715 al = gfs2_alloc_get(ip); 716 qa = gfs2_qadata_get(ip);
716 if (!al) 717 if (!qa)
717 return -ENOMEM; 718 return -ENOMEM;
718 719
719 error = gfs2_quota_lock_check(ip); 720 error = gfs2_quota_lock_check(ip);
720 if (error) 721 if (error)
721 goto out; 722 goto out;
722 723
723 al->al_requested = blks; 724 error = gfs2_inplace_reserve(ip, blks);
724
725 error = gfs2_inplace_reserve(ip);
726 if (error) 725 if (error)
727 goto out_gunlock_q; 726 goto out_gunlock_q;
728 727
@@ -751,7 +750,7 @@ out_ipres:
751out_gunlock_q: 750out_gunlock_q:
752 gfs2_quota_unlock(ip); 751 gfs2_quota_unlock(ip);
753out: 752out:
754 gfs2_alloc_put(ip); 753 gfs2_qadata_put(ip);
755 return error; 754 return error;
756} 755}
757 756
@@ -991,7 +990,7 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er,
991 } else { 990 } else {
992 u64 blk; 991 u64 blk;
993 unsigned int n = 1; 992 unsigned int n = 1;
994 error = gfs2_alloc_block(ip, &blk, &n); 993 error = gfs2_alloc_blocks(ip, &blk, &n, 0, NULL);
995 if (error) 994 if (error)
996 return error; 995 return error;
997 gfs2_trans_add_unrevoke(sdp, blk, 1); 996 gfs2_trans_add_unrevoke(sdp, blk, 1);
@@ -1435,9 +1434,9 @@ out:
1435static int ea_dealloc_block(struct gfs2_inode *ip) 1434static int ea_dealloc_block(struct gfs2_inode *ip)
1436{ 1435{
1437 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1436 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1438 struct gfs2_alloc *al = ip->i_alloc;
1439 struct gfs2_rgrpd *rgd; 1437 struct gfs2_rgrpd *rgd;
1440 struct buffer_head *dibh; 1438 struct buffer_head *dibh;
1439 struct gfs2_holder gh;
1441 int error; 1440 int error;
1442 1441
1443 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr); 1442 rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr);
@@ -1446,8 +1445,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1446 return -EIO; 1445 return -EIO;
1447 } 1446 }
1448 1447
1449 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, 1448 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
1450 &al->al_rgd_gh);
1451 if (error) 1449 if (error)
1452 return error; 1450 return error;
1453 1451
@@ -1471,7 +1469,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
1471 gfs2_trans_end(sdp); 1469 gfs2_trans_end(sdp);
1472 1470
1473out_gunlock: 1471out_gunlock:
1474 gfs2_glock_dq_uninit(&al->al_rgd_gh); 1472 gfs2_glock_dq_uninit(&gh);
1475 return error; 1473 return error;
1476} 1474}
1477 1475
@@ -1484,11 +1482,11 @@ out_gunlock:
1484 1482
1485int gfs2_ea_dealloc(struct gfs2_inode *ip) 1483int gfs2_ea_dealloc(struct gfs2_inode *ip)
1486{ 1484{
1487 struct gfs2_alloc *al; 1485 struct gfs2_qadata *qa;
1488 int error; 1486 int error;
1489 1487
1490 al = gfs2_alloc_get(ip); 1488 qa = gfs2_qadata_get(ip);
1491 if (!al) 1489 if (!qa)
1492 return -ENOMEM; 1490 return -ENOMEM;
1493 1491
1494 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); 1492 error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
@@ -1510,7 +1508,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
1510out_quota: 1508out_quota:
1511 gfs2_quota_unhold(ip); 1509 gfs2_quota_unhold(ip);
1512out_alloc: 1510out_alloc:
1513 gfs2_alloc_put(ip); 1511 gfs2_qadata_put(ip);
1514 return error; 1512 return error;
1515} 1513}
1516 1514
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index bce4eef91a06..62fc14ea4b73 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -186,7 +186,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
186 * a directory and return a corresponding inode, given the inode for 186 * a directory and return a corresponding inode, given the inode for
187 * the directory and the name (and its length) of the new file. 187 * the directory and the name (and its length) of the new file.
188 */ 188 */
189static int hfs_create(struct inode *dir, struct dentry *dentry, int mode, 189static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
190 struct nameidata *nd) 190 struct nameidata *nd)
191{ 191{
192 struct inode *inode; 192 struct inode *inode;
@@ -216,7 +216,7 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, int mode,
216 * in a directory, given the inode for the parent directory and the 216 * in a directory, given the inode for the parent directory and the
217 * name (and its length) of the new directory. 217 * name (and its length) of the new directory.
218 */ 218 */
219static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 219static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
220{ 220{
221 struct inode *inode; 221 struct inode *inode;
222 int res; 222 int res;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index ad97c2d58287..1bf967c6bfdc 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -184,7 +184,7 @@ extern int hfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
184extern const struct address_space_operations hfs_aops; 184extern const struct address_space_operations hfs_aops;
185extern const struct address_space_operations hfs_btree_aops; 185extern const struct address_space_operations hfs_btree_aops;
186 186
187extern struct inode *hfs_new_inode(struct inode *, struct qstr *, int); 187extern struct inode *hfs_new_inode(struct inode *, struct qstr *, umode_t);
188extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *); 188extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
189extern int hfs_write_inode(struct inode *, struct writeback_control *); 189extern int hfs_write_inode(struct inode *, struct writeback_control *);
190extern int hfs_inode_setattr(struct dentry *, struct iattr *); 190extern int hfs_inode_setattr(struct dentry *, struct iattr *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index a1a9fdcd2a00..737dbeb64320 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -169,7 +169,7 @@ const struct address_space_operations hfs_aops = {
169/* 169/*
170 * hfs_new_inode 170 * hfs_new_inode
171 */ 171 */
172struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode) 172struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode)
173{ 173{
174 struct super_block *sb = dir->i_sb; 174 struct super_block *sb = dir->i_sb;
175 struct inode *inode = new_inode(sb); 175 struct inode *inode = new_inode(sb);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 1b55f704fb22..8137fb3e6780 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -133,9 +133,9 @@ static int hfs_remount(struct super_block *sb, int *flags, char *data)
133 return 0; 133 return 0;
134} 134}
135 135
136static int hfs_show_options(struct seq_file *seq, struct vfsmount *mnt) 136static int hfs_show_options(struct seq_file *seq, struct dentry *root)
137{ 137{
138 struct hfs_sb_info *sbi = HFS_SB(mnt->mnt_sb); 138 struct hfs_sb_info *sbi = HFS_SB(root->d_sb);
139 139
140 if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f)) 140 if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
141 seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator); 141 seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
@@ -170,7 +170,6 @@ static struct inode *hfs_alloc_inode(struct super_block *sb)
170static void hfs_i_callback(struct rcu_head *head) 170static void hfs_i_callback(struct rcu_head *head)
171{ 171{
172 struct inode *inode = container_of(head, struct inode, i_rcu); 172 struct inode *inode = container_of(head, struct inode, i_rcu);
173 INIT_LIST_HEAD(&inode->i_dentry);
174 kmem_cache_free(hfs_inode_cachep, HFS_I(inode)); 173 kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
175} 174}
176 175
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4536cd3f15ae..88e155f895c6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -424,7 +424,7 @@ out:
424} 424}
425 425
426static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, 426static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
427 int mode, dev_t rdev) 427 umode_t mode, dev_t rdev)
428{ 428{
429 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); 429 struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
430 struct inode *inode; 430 struct inode *inode;
@@ -453,13 +453,13 @@ out:
453 return res; 453 return res;
454} 454}
455 455
456static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, 456static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
457 struct nameidata *nd) 457 struct nameidata *nd)
458{ 458{
459 return hfsplus_mknod(dir, dentry, mode, 0); 459 return hfsplus_mknod(dir, dentry, mode, 0);
460} 460}
461 461
462static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) 462static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
463{ 463{
464 return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0); 464 return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0);
465} 465}
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d7674d051f52..21a5b7fc6db4 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -402,7 +402,7 @@ void hfsplus_inode_read_fork(struct inode *, struct hfsplus_fork_raw *);
402void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *); 402void hfsplus_inode_write_fork(struct inode *, struct hfsplus_fork_raw *);
403int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *); 403int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
404int hfsplus_cat_write_inode(struct inode *); 404int hfsplus_cat_write_inode(struct inode *);
405struct inode *hfsplus_new_inode(struct super_block *, int); 405struct inode *hfsplus_new_inode(struct super_block *, umode_t);
406void hfsplus_delete_inode(struct inode *); 406void hfsplus_delete_inode(struct inode *);
407int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, 407int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
408 int datasync); 408 int datasync);
@@ -419,7 +419,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
419int hfsplus_parse_options(char *, struct hfsplus_sb_info *); 419int hfsplus_parse_options(char *, struct hfsplus_sb_info *);
420int hfsplus_parse_options_remount(char *input, int *force); 420int hfsplus_parse_options_remount(char *input, int *force);
421void hfsplus_fill_defaults(struct hfsplus_sb_info *); 421void hfsplus_fill_defaults(struct hfsplus_sb_info *);
422int hfsplus_show_options(struct seq_file *, struct vfsmount *); 422int hfsplus_show_options(struct seq_file *, struct dentry *);
423 423
424/* super.c */ 424/* super.c */
425struct inode *hfsplus_iget(struct super_block *, unsigned long); 425struct inode *hfsplus_iget(struct super_block *, unsigned long);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 40e1413be4cf..6643b242bdd7 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -378,7 +378,7 @@ static const struct file_operations hfsplus_file_operations = {
378 .unlocked_ioctl = hfsplus_ioctl, 378 .unlocked_ioctl = hfsplus_ioctl,
379}; 379};
380 380
381struct inode *hfsplus_new_inode(struct super_block *sb, int mode) 381struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
382{ 382{
383 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 383 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
384 struct inode *inode = new_inode(sb); 384 struct inode *inode = new_inode(sb);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index fbaa6690c8e0..f66c7655b3f7 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -43,7 +43,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
43 unsigned int flags; 43 unsigned int flags;
44 int err = 0; 44 int err = 0;
45 45
46 err = mnt_want_write(file->f_path.mnt); 46 err = mnt_want_write_file(file);
47 if (err) 47 if (err)
48 goto out; 48 goto out;
49 49
@@ -94,7 +94,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
94out_unlock_inode: 94out_unlock_inode:
95 mutex_unlock(&inode->i_mutex); 95 mutex_unlock(&inode->i_mutex);
96out_drop_write: 96out_drop_write:
97 mnt_drop_write(file->f_path.mnt); 97 mnt_drop_write_file(file);
98out: 98out:
99 return err; 99 return err;
100} 100}
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index bb62a5882147..06fa5618600c 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -206,9 +206,9 @@ done:
206 return 1; 206 return 1;
207} 207}
208 208
209int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) 209int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
210{ 210{
211 struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb); 211 struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb);
212 212
213 if (sbi->creator != HFSPLUS_DEF_CR_TYPE) 213 if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
214 seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); 214 seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index d24a9b666a23..edf0a801446b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -558,7 +558,6 @@ static void hfsplus_i_callback(struct rcu_head *head)
558{ 558{
559 struct inode *inode = container_of(head, struct inode, i_rcu); 559 struct inode *inode = container_of(head, struct inode, i_rcu);
560 560
561 INIT_LIST_HEAD(&inode->i_dentry);
562 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); 561 kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode));
563} 562}
564 563
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index bf15a43016b9..3cbfa93cd782 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -39,7 +39,7 @@
39 39
40struct hostfs_iattr { 40struct hostfs_iattr {
41 unsigned int ia_valid; 41 unsigned int ia_valid;
42 mode_t ia_mode; 42 unsigned short ia_mode;
43 uid_t ia_uid; 43 uid_t ia_uid;
44 gid_t ia_gid; 44 gid_t ia_gid;
45 loff_t ia_size; 45 loff_t ia_size;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2f72da5ae686..e130bd46d671 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -250,7 +250,6 @@ static void hostfs_evict_inode(struct inode *inode)
250static void hostfs_i_callback(struct rcu_head *head) 250static void hostfs_i_callback(struct rcu_head *head)
251{ 251{
252 struct inode *inode = container_of(head, struct inode, i_rcu); 252 struct inode *inode = container_of(head, struct inode, i_rcu);
253 INIT_LIST_HEAD(&inode->i_dentry);
254 kfree(HOSTFS_I(inode)); 253 kfree(HOSTFS_I(inode));
255} 254}
256 255
@@ -259,9 +258,9 @@ static void hostfs_destroy_inode(struct inode *inode)
259 call_rcu(&inode->i_rcu, hostfs_i_callback); 258 call_rcu(&inode->i_rcu, hostfs_i_callback);
260} 259}
261 260
262static int hostfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 261static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
263{ 262{
264 const char *root_path = vfs->mnt_sb->s_fs_info; 263 const char *root_path = root->d_sb->s_fs_info;
265 size_t offset = strlen(root_ino) + 1; 264 size_t offset = strlen(root_ino) + 1;
266 265
267 if (strlen(root_path) > offset) 266 if (strlen(root_path) > offset)
@@ -552,7 +551,7 @@ static int read_name(struct inode *ino, char *name)
552 return 0; 551 return 0;
553} 552}
554 553
555int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, 554int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
556 struct nameidata *nd) 555 struct nameidata *nd)
557{ 556{
558 struct inode *inode; 557 struct inode *inode;
@@ -677,7 +676,7 @@ int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
677 return err; 676 return err;
678} 677}
679 678
680int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) 679int hostfs_mkdir(struct inode *ino, struct dentry *dentry, umode_t mode)
681{ 680{
682 char *file; 681 char *file;
683 int err; 682 int err;
@@ -701,7 +700,7 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
701 return err; 700 return err;
702} 701}
703 702
704int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 703static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
705{ 704{
706 struct inode *inode; 705 struct inode *inode;
707 char *name; 706 char *name;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index ea91fcb0ef9b..30dd7b10b507 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -8,7 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include "hpfs_fn.h" 9#include "hpfs_fn.h"
10 10
11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
12{ 12{
13 const unsigned char *name = dentry->d_name.name; 13 const unsigned char *name = dentry->d_name.name;
14 unsigned len = dentry->d_name.len; 14 unsigned len = dentry->d_name.len;
@@ -115,7 +115,7 @@ bail:
115 return err; 115 return err;
116} 116}
117 117
118static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 118static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
119{ 119{
120 const unsigned char *name = dentry->d_name.name; 120 const unsigned char *name = dentry->d_name.name;
121 unsigned len = dentry->d_name.len; 121 unsigned len = dentry->d_name.len;
@@ -201,7 +201,7 @@ bail:
201 return err; 201 return err;
202} 202}
203 203
204static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 204static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
205{ 205{
206 const unsigned char *name = dentry->d_name.name; 206 const unsigned char *name = dentry->d_name.name;
207 unsigned len = dentry->d_name.len; 207 unsigned len = dentry->d_name.len;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 98580a3b5005..3690467c944e 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -181,7 +181,6 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
181static void hpfs_i_callback(struct rcu_head *head) 181static void hpfs_i_callback(struct rcu_head *head)
182{ 182{
183 struct inode *inode = container_of(head, struct inode, i_rcu); 183 struct inode *inode = container_of(head, struct inode, i_rcu);
184 INIT_LIST_HEAD(&inode->i_dentry);
185 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); 184 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
186} 185}
187 186
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index f590b1160c6c..d92f4ce80925 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -622,7 +622,6 @@ void hppfs_evict_inode(struct inode *ino)
622static void hppfs_i_callback(struct rcu_head *head) 622static void hppfs_i_callback(struct rcu_head *head)
623{ 623{
624 struct inode *inode = container_of(head, struct inode, i_rcu); 624 struct inode *inode = container_of(head, struct inode, i_rcu);
625 INIT_LIST_HEAD(&inode->i_dentry);
626 kfree(HPPFS_I(inode)); 625 kfree(HPPFS_I(inode));
627} 626}
628 627
@@ -726,7 +725,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
726 sb->s_fs_info = proc_mnt; 725 sb->s_fs_info = proc_mnt;
727 726
728 err = -ENOMEM; 727 err = -ENOMEM;
729 root_inode = get_inode(sb, dget(proc_mnt->mnt_sb->s_root)); 728 root_inode = get_inode(sb, dget(proc_mnt->mnt_root));
730 if (!root_inode) 729 if (!root_inode)
731 goto out_mntput; 730 goto out_mntput;
732 731
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0be5a78598d0..e425ad9d0490 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -447,8 +447,8 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
447 return 0; 447 return 0;
448} 448}
449 449
450static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 450static struct inode *hugetlbfs_get_root(struct super_block *sb,
451 gid_t gid, int mode, dev_t dev) 451 struct hugetlbfs_config *config)
452{ 452{
453 struct inode *inode; 453 struct inode *inode;
454 454
@@ -456,9 +456,31 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
456 if (inode) { 456 if (inode) {
457 struct hugetlbfs_inode_info *info; 457 struct hugetlbfs_inode_info *info;
458 inode->i_ino = get_next_ino(); 458 inode->i_ino = get_next_ino();
459 inode->i_mode = mode; 459 inode->i_mode = S_IFDIR | config->mode;
460 inode->i_uid = uid; 460 inode->i_uid = config->uid;
461 inode->i_gid = gid; 461 inode->i_gid = config->gid;
462 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
463 info = HUGETLBFS_I(inode);
464 mpol_shared_policy_init(&info->policy, NULL);
465 inode->i_op = &hugetlbfs_dir_inode_operations;
466 inode->i_fop = &simple_dir_operations;
467 /* directory inodes start off with i_nlink == 2 (for "." entry) */
468 inc_nlink(inode);
469 }
470 return inode;
471}
472
473static struct inode *hugetlbfs_get_inode(struct super_block *sb,
474 struct inode *dir,
475 umode_t mode, dev_t dev)
476{
477 struct inode *inode;
478
479 inode = new_inode(sb);
480 if (inode) {
481 struct hugetlbfs_inode_info *info;
482 inode->i_ino = get_next_ino();
483 inode_init_owner(inode, dir, mode);
462 inode->i_mapping->a_ops = &hugetlbfs_aops; 484 inode->i_mapping->a_ops = &hugetlbfs_aops;
463 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 485 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
464 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 486 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -500,20 +522,12 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
500 * File creation. Allocate an inode, and we're done.. 522 * File creation. Allocate an inode, and we're done..
501 */ 523 */
502static int hugetlbfs_mknod(struct inode *dir, 524static int hugetlbfs_mknod(struct inode *dir,
503 struct dentry *dentry, int mode, dev_t dev) 525 struct dentry *dentry, umode_t mode, dev_t dev)
504{ 526{
505 struct inode *inode; 527 struct inode *inode;
506 int error = -ENOSPC; 528 int error = -ENOSPC;
507 gid_t gid; 529
508 530 inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
509 if (dir->i_mode & S_ISGID) {
510 gid = dir->i_gid;
511 if (S_ISDIR(mode))
512 mode |= S_ISGID;
513 } else {
514 gid = current_fsgid();
515 }
516 inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), gid, mode, dev);
517 if (inode) { 531 if (inode) {
518 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 532 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
519 d_instantiate(dentry, inode); 533 d_instantiate(dentry, inode);
@@ -523,7 +537,7 @@ static int hugetlbfs_mknod(struct inode *dir,
523 return error; 537 return error;
524} 538}
525 539
526static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 540static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
527{ 541{
528 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 542 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
529 if (!retval) 543 if (!retval)
@@ -531,7 +545,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
531 return retval; 545 return retval;
532} 546}
533 547
534static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 548static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
535{ 549{
536 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 550 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
537} 551}
@@ -541,15 +555,8 @@ static int hugetlbfs_symlink(struct inode *dir,
541{ 555{
542 struct inode *inode; 556 struct inode *inode;
543 int error = -ENOSPC; 557 int error = -ENOSPC;
544 gid_t gid;
545
546 if (dir->i_mode & S_ISGID)
547 gid = dir->i_gid;
548 else
549 gid = current_fsgid();
550 558
551 inode = hugetlbfs_get_inode(dir->i_sb, current_fsuid(), 559 inode = hugetlbfs_get_inode(dir->i_sb, dir, S_IFLNK|S_IRWXUGO, 0);
552 gid, S_IFLNK|S_IRWXUGO, 0);
553 if (inode) { 560 if (inode) {
554 int l = strlen(symname)+1; 561 int l = strlen(symname)+1;
555 error = page_symlink(inode, symname, l); 562 error = page_symlink(inode, symname, l);
@@ -666,7 +673,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
666static void hugetlbfs_i_callback(struct rcu_head *head) 673static void hugetlbfs_i_callback(struct rcu_head *head)
667{ 674{
668 struct inode *inode = container_of(head, struct inode, i_rcu); 675 struct inode *inode = container_of(head, struct inode, i_rcu);
669 INIT_LIST_HEAD(&inode->i_dentry);
670 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 676 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
671} 677}
672 678
@@ -858,8 +864,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
858 sb->s_magic = HUGETLBFS_MAGIC; 864 sb->s_magic = HUGETLBFS_MAGIC;
859 sb->s_op = &hugetlbfs_ops; 865 sb->s_op = &hugetlbfs_ops;
860 sb->s_time_gran = 1; 866 sb->s_time_gran = 1;
861 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 867 inode = hugetlbfs_get_root(sb, &config);
862 S_IFDIR | config.mode, 0);
863 if (!inode) 868 if (!inode)
864 goto out_free; 869 goto out_free;
865 870
@@ -957,8 +962,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
957 962
958 path.mnt = mntget(hugetlbfs_vfsmount); 963 path.mnt = mntget(hugetlbfs_vfsmount);
959 error = -ENOSPC; 964 error = -ENOSPC;
960 inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(), 965 inode = hugetlbfs_get_inode(root->d_sb, NULL, S_IFREG | S_IRWXUGO, 0);
961 current_fsgid(), S_IFREG | S_IRWXUGO, 0);
962 if (!inode) 966 if (!inode)
963 goto out_dentry; 967 goto out_dentry;
964 968
diff --git a/fs/inode.c b/fs/inode.c
index ee4e66b998f4..87535753ab04 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -26,6 +26,7 @@
26#include <linux/ima.h> 26#include <linux/ima.h>
27#include <linux/cred.h> 27#include <linux/cred.h>
28#include <linux/buffer_head.h> /* for inode_has_buffers */ 28#include <linux/buffer_head.h> /* for inode_has_buffers */
29#include <linux/ratelimit.h>
29#include "internal.h" 30#include "internal.h"
30 31
31/* 32/*
@@ -191,6 +192,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
191 } 192 }
192 inode->i_private = NULL; 193 inode->i_private = NULL;
193 inode->i_mapping = mapping; 194 inode->i_mapping = mapping;
195 INIT_LIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */
194#ifdef CONFIG_FS_POSIX_ACL 196#ifdef CONFIG_FS_POSIX_ACL
195 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; 197 inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
196#endif 198#endif
@@ -241,6 +243,11 @@ void __destroy_inode(struct inode *inode)
241 BUG_ON(inode_has_buffers(inode)); 243 BUG_ON(inode_has_buffers(inode));
242 security_inode_free(inode); 244 security_inode_free(inode);
243 fsnotify_inode_delete(inode); 245 fsnotify_inode_delete(inode);
246 if (!inode->i_nlink) {
247 WARN_ON(atomic_long_read(&inode->i_sb->s_remove_count) == 0);
248 atomic_long_dec(&inode->i_sb->s_remove_count);
249 }
250
244#ifdef CONFIG_FS_POSIX_ACL 251#ifdef CONFIG_FS_POSIX_ACL
245 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) 252 if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED)
246 posix_acl_release(inode->i_acl); 253 posix_acl_release(inode->i_acl);
@@ -254,7 +261,6 @@ EXPORT_SYMBOL(__destroy_inode);
254static void i_callback(struct rcu_head *head) 261static void i_callback(struct rcu_head *head)
255{ 262{
256 struct inode *inode = container_of(head, struct inode, i_rcu); 263 struct inode *inode = container_of(head, struct inode, i_rcu);
257 INIT_LIST_HEAD(&inode->i_dentry);
258 kmem_cache_free(inode_cachep, inode); 264 kmem_cache_free(inode_cachep, inode);
259} 265}
260 266
@@ -268,6 +274,85 @@ static void destroy_inode(struct inode *inode)
268 call_rcu(&inode->i_rcu, i_callback); 274 call_rcu(&inode->i_rcu, i_callback);
269} 275}
270 276
277/**
278 * drop_nlink - directly drop an inode's link count
279 * @inode: inode
280 *
281 * This is a low-level filesystem helper to replace any
282 * direct filesystem manipulation of i_nlink. In cases
283 * where we are attempting to track writes to the
284 * filesystem, a decrement to zero means an imminent
285 * write when the file is truncated and actually unlinked
286 * on the filesystem.
287 */
288void drop_nlink(struct inode *inode)
289{
290 WARN_ON(inode->i_nlink == 0);
291 inode->__i_nlink--;
292 if (!inode->i_nlink)
293 atomic_long_inc(&inode->i_sb->s_remove_count);
294}
295EXPORT_SYMBOL(drop_nlink);
296
297/**
298 * clear_nlink - directly zero an inode's link count
299 * @inode: inode
300 *
301 * This is a low-level filesystem helper to replace any
302 * direct filesystem manipulation of i_nlink. See
303 * drop_nlink() for why we care about i_nlink hitting zero.
304 */
305void clear_nlink(struct inode *inode)
306{
307 if (inode->i_nlink) {
308 inode->__i_nlink = 0;
309 atomic_long_inc(&inode->i_sb->s_remove_count);
310 }
311}
312EXPORT_SYMBOL(clear_nlink);
313
314/**
315 * set_nlink - directly set an inode's link count
316 * @inode: inode
317 * @nlink: new nlink (should be non-zero)
318 *
319 * This is a low-level filesystem helper to replace any
320 * direct filesystem manipulation of i_nlink.
321 */
322void set_nlink(struct inode *inode, unsigned int nlink)
323{
324 if (!nlink) {
325 printk_ratelimited(KERN_INFO
326 "set_nlink() clearing i_nlink on %s inode %li\n",
327 inode->i_sb->s_type->name, inode->i_ino);
328 clear_nlink(inode);
329 } else {
330 /* Yes, some filesystems do change nlink from zero to one */
331 if (inode->i_nlink == 0)
332 atomic_long_dec(&inode->i_sb->s_remove_count);
333
334 inode->__i_nlink = nlink;
335 }
336}
337EXPORT_SYMBOL(set_nlink);
338
339/**
340 * inc_nlink - directly increment an inode's link count
341 * @inode: inode
342 *
343 * This is a low-level filesystem helper to replace any
344 * direct filesystem manipulation of i_nlink. Currently,
345 * it is only here for parity with dec_nlink().
346 */
347void inc_nlink(struct inode *inode)
348{
349 if (WARN_ON(inode->i_nlink == 0))
350 atomic_long_dec(&inode->i_sb->s_remove_count);
351
352 inode->__i_nlink++;
353}
354EXPORT_SYMBOL(inc_nlink);
355
271void address_space_init_once(struct address_space *mapping) 356void address_space_init_once(struct address_space *mapping)
272{ 357{
273 memset(mapping, 0, sizeof(*mapping)); 358 memset(mapping, 0, sizeof(*mapping));
@@ -290,7 +375,6 @@ void inode_init_once(struct inode *inode)
290{ 375{
291 memset(inode, 0, sizeof(*inode)); 376 memset(inode, 0, sizeof(*inode));
292 INIT_HLIST_NODE(&inode->i_hash); 377 INIT_HLIST_NODE(&inode->i_hash);
293 INIT_LIST_HEAD(&inode->i_dentry);
294 INIT_LIST_HEAD(&inode->i_devices); 378 INIT_LIST_HEAD(&inode->i_devices);
295 INIT_LIST_HEAD(&inode->i_wb_list); 379 INIT_LIST_HEAD(&inode->i_wb_list);
296 INIT_LIST_HEAD(&inode->i_lru); 380 INIT_LIST_HEAD(&inode->i_lru);
@@ -1508,7 +1592,7 @@ void file_update_time(struct file *file)
1508 if (sync_it & S_MTIME) 1592 if (sync_it & S_MTIME)
1509 inode->i_mtime = now; 1593 inode->i_mtime = now;
1510 mark_inode_dirty_sync(inode); 1594 mark_inode_dirty_sync(inode);
1511 mnt_drop_write(file->f_path.mnt); 1595 mnt_drop_write_file(file);
1512} 1596}
1513EXPORT_SYMBOL(file_update_time); 1597EXPORT_SYMBOL(file_update_time);
1514 1598
@@ -1647,7 +1731,7 @@ EXPORT_SYMBOL(init_special_inode);
1647 * @mode: mode of the new inode 1731 * @mode: mode of the new inode
1648 */ 1732 */
1649void inode_init_owner(struct inode *inode, const struct inode *dir, 1733void inode_init_owner(struct inode *inode, const struct inode *dir,
1650 mode_t mode) 1734 umode_t mode)
1651{ 1735{
1652 inode->i_uid = current_fsuid(); 1736 inode->i_uid = current_fsuid();
1653 if (dir && dir->i_mode & S_ISGID) { 1737 if (dir && dir->i_mode & S_ISGID) {
diff --git a/fs/internal.h b/fs/internal.h
index fe327c20af83..9962c59ba280 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -15,19 +15,14 @@ struct super_block;
15struct file_system_type; 15struct file_system_type;
16struct linux_binprm; 16struct linux_binprm;
17struct path; 17struct path;
18struct mount;
18 19
19/* 20/*
20 * block_dev.c 21 * block_dev.c
21 */ 22 */
22#ifdef CONFIG_BLOCK 23#ifdef CONFIG_BLOCK
23extern struct super_block *blockdev_superblock;
24extern void __init bdev_cache_init(void); 24extern void __init bdev_cache_init(void);
25 25
26static inline int sb_is_blkdev_sb(struct super_block *sb)
27{
28 return sb == blockdev_superblock;
29}
30
31extern int __sync_blockdev(struct block_device *bdev, int wait); 26extern int __sync_blockdev(struct block_device *bdev, int wait);
32 27
33#else 28#else
@@ -35,11 +30,6 @@ static inline void bdev_cache_init(void)
35{ 30{
36} 31}
37 32
38static inline int sb_is_blkdev_sb(struct super_block *sb)
39{
40 return 0;
41}
42
43static inline int __sync_blockdev(struct block_device *bdev, int wait) 33static inline int __sync_blockdev(struct block_device *bdev, int wait)
44{ 34{
45 return 0; 35 return 0;
@@ -52,28 +42,17 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait)
52extern void __init chrdev_init(void); 42extern void __init chrdev_init(void);
53 43
54/* 44/*
55 * exec.c
56 */
57extern int check_unsafe_exec(struct linux_binprm *);
58
59/*
60 * namespace.c 45 * namespace.c
61 */ 46 */
62extern int copy_mount_options(const void __user *, unsigned long *); 47extern int copy_mount_options(const void __user *, unsigned long *);
63extern int copy_mount_string(const void __user *, char **); 48extern int copy_mount_string(const void __user *, char **);
64 49
65extern unsigned int mnt_get_count(struct vfsmount *mnt);
66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern struct vfsmount *lookup_mnt(struct path *); 50extern struct vfsmount *lookup_mnt(struct path *);
68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
69 struct vfsmount *);
70extern void release_mounts(struct list_head *);
71extern void umount_tree(struct vfsmount *, int, struct list_head *);
72extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
73extern int finish_automount(struct vfsmount *, struct path *); 51extern int finish_automount(struct vfsmount *, struct path *);
74 52
75extern void mnt_make_longterm(struct vfsmount *); 53extern void mnt_make_longterm(struct vfsmount *);
76extern void mnt_make_shortterm(struct vfsmount *); 54extern void mnt_make_shortterm(struct vfsmount *);
55extern int sb_prepare_remount_readonly(struct super_block *);
77 56
78extern void __init mnt_init(void); 57extern void __init mnt_init(void);
79 58
@@ -98,10 +77,9 @@ extern struct file *get_empty_filp(void);
98 */ 77 */
99extern int do_remount_sb(struct super_block *, int, void *, int); 78extern int do_remount_sb(struct super_block *, int, void *, int);
100extern bool grab_super_passive(struct super_block *sb); 79extern bool grab_super_passive(struct super_block *sb);
101extern void __put_super(struct super_block *sb);
102extern void put_super(struct super_block *sb);
103extern struct dentry *mount_fs(struct file_system_type *, 80extern struct dentry *mount_fs(struct file_system_type *,
104 int, const char *, void *); 81 int, const char *, void *);
82extern struct super_block *user_get_super(dev_t);
105 83
106/* 84/*
107 * open.c 85 * open.c
@@ -111,7 +89,7 @@ extern struct file *nameidata_to_filp(struct nameidata *);
111extern void release_open_intent(struct nameidata *); 89extern void release_open_intent(struct nameidata *);
112struct open_flags { 90struct open_flags {
113 int open_flag; 91 int open_flag;
114 int mode; 92 umode_t mode;
115 int acc_mode; 93 int acc_mode;
116 int intent; 94 int intent;
117}; 95};
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1d9b9fcb2db4..066836e81848 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -42,7 +42,7 @@ static long vfs_ioctl(struct file *filp, unsigned int cmd,
42 42
43 error = filp->f_op->unlocked_ioctl(filp, cmd, arg); 43 error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
44 if (error == -ENOIOCTLCMD) 44 if (error == -ENOIOCTLCMD)
45 error = -EINVAL; 45 error = -ENOTTY;
46 out: 46 out:
47 return error; 47 return error;
48} 48}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index f950059525fc..7b99f5f460be 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -85,7 +85,6 @@ static struct inode *isofs_alloc_inode(struct super_block *sb)
85static void isofs_i_callback(struct rcu_head *head) 85static void isofs_i_callback(struct rcu_head *head)
86{ 86{
87 struct inode *inode = container_of(head, struct inode, i_rcu); 87 struct inode *inode = container_of(head, struct inode, i_rcu);
88 INIT_LIST_HEAD(&inode->i_dentry);
89 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); 88 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
90} 89}
91 90
@@ -170,8 +169,8 @@ struct iso9660_options{
170 unsigned char map; 169 unsigned char map;
171 unsigned char check; 170 unsigned char check;
172 unsigned int blocksize; 171 unsigned int blocksize;
173 mode_t fmode; 172 umode_t fmode;
174 mode_t dmode; 173 umode_t dmode;
175 gid_t gid; 174 gid_t gid;
176 uid_t uid; 175 uid_t uid;
177 char *iocharset; 176 char *iocharset;
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 7d33de84f52a..0e73f63d9274 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -50,14 +50,14 @@ struct isofs_sb_info {
50 unsigned int s_uid_set:1; 50 unsigned int s_uid_set:1;
51 unsigned int s_gid_set:1; 51 unsigned int s_gid_set:1;
52 52
53 mode_t s_fmode; 53 umode_t s_fmode;
54 mode_t s_dmode; 54 umode_t s_dmode;
55 gid_t s_gid; 55 gid_t s_gid;
56 uid_t s_uid; 56 uid_t s_uid;
57 struct nls_table *s_nls_iocharset; /* Native language support table */ 57 struct nls_table *s_nls_iocharset; /* Native language support table */
58}; 58};
59 59
60#define ISOFS_INVALID_MODE ((mode_t) -1) 60#define ISOFS_INVALID_MODE ((umode_t) -1)
61 61
62static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb) 62static inline struct isofs_sb_info *ISOFS_SB(struct super_block *sb)
63{ 63{
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index f94fc48ff3a0..5d1a00a5041b 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -537,7 +537,7 @@ int cleanup_journal_tail(journal_t *journal)
537 * them. 537 * them.
538 * 538 *
539 * Called with j_list_lock held. 539 * Called with j_list_lock held.
540 * Returns number of bufers reaped (for debug) 540 * Returns number of buffers reaped (for debug)
541 */ 541 */
542 542
543static int journal_clean_one_cp_list(struct journal_head *jh, int *released) 543static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index fea8dd661d2b..a96cff0c5f1d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -166,7 +166,7 @@ loop:
166 */ 166 */
167 jbd_debug(1, "Now suspending kjournald\n"); 167 jbd_debug(1, "Now suspending kjournald\n");
168 spin_unlock(&journal->j_state_lock); 168 spin_unlock(&journal->j_state_lock);
169 refrigerator(); 169 try_to_freeze();
170 spin_lock(&journal->j_state_lock); 170 spin_lock(&journal->j_state_lock);
171 } else { 171 } else {
172 /* 172 /*
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 16a698bd906d..d49d202903fb 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -565,7 +565,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
565 * 565 *
566 * Called with the journal locked. 566 * Called with the journal locked.
567 * Called with j_list_lock held. 567 * Called with j_list_lock held.
568 * Returns number of bufers reaped (for debug) 568 * Returns number of buffers reaped (for debug)
569 */ 569 */
570 570
571static int journal_clean_one_cp_list(struct journal_head *jh, int *released) 571static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0fa0123151d3..c0a5f9f1b127 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -173,7 +173,7 @@ loop:
173 */ 173 */
174 jbd_debug(1, "Now suspending kjournald2\n"); 174 jbd_debug(1, "Now suspending kjournald2\n");
175 write_unlock(&journal->j_state_lock); 175 write_unlock(&journal->j_state_lock);
176 refrigerator(); 176 try_to_freeze();
177 write_lock(&journal->j_state_lock); 177 write_lock(&journal->j_state_lock);
178 } else { 178 } else {
179 /* 179 /*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index be6169bd8acd..973ac5822bd7 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -22,16 +22,16 @@
22 22
23static int jffs2_readdir (struct file *, void *, filldir_t); 23static int jffs2_readdir (struct file *, void *, filldir_t);
24 24
25static int jffs2_create (struct inode *,struct dentry *,int, 25static int jffs2_create (struct inode *,struct dentry *,umode_t,
26 struct nameidata *); 26 struct nameidata *);
27static struct dentry *jffs2_lookup (struct inode *,struct dentry *, 27static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
28 struct nameidata *); 28 struct nameidata *);
29static int jffs2_link (struct dentry *,struct inode *,struct dentry *); 29static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
30static int jffs2_unlink (struct inode *,struct dentry *); 30static int jffs2_unlink (struct inode *,struct dentry *);
31static int jffs2_symlink (struct inode *,struct dentry *,const char *); 31static int jffs2_symlink (struct inode *,struct dentry *,const char *);
32static int jffs2_mkdir (struct inode *,struct dentry *,int); 32static int jffs2_mkdir (struct inode *,struct dentry *,umode_t);
33static int jffs2_rmdir (struct inode *,struct dentry *); 33static int jffs2_rmdir (struct inode *,struct dentry *);
34static int jffs2_mknod (struct inode *,struct dentry *,int,dev_t); 34static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
35static int jffs2_rename (struct inode *, struct dentry *, 35static int jffs2_rename (struct inode *, struct dentry *,
36 struct inode *, struct dentry *); 36 struct inode *, struct dentry *);
37 37
@@ -169,8 +169,8 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir)
169/***********************************************************************/ 169/***********************************************************************/
170 170
171 171
172static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode, 172static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
173 struct nameidata *nd) 173 umode_t mode, struct nameidata *nd)
174{ 174{
175 struct jffs2_raw_inode *ri; 175 struct jffs2_raw_inode *ri;
176 struct jffs2_inode_info *f, *dir_f; 176 struct jffs2_inode_info *f, *dir_f;
@@ -450,7 +450,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
450} 450}
451 451
452 452
453static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode) 453static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode)
454{ 454{
455 struct jffs2_inode_info *f, *dir_f; 455 struct jffs2_inode_info *f, *dir_f;
456 struct jffs2_sb_info *c; 456 struct jffs2_sb_info *c;
@@ -618,7 +618,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
618 return ret; 618 return ret;
619} 619}
620 620
621static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, dev_t rdev) 621static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode, dev_t rdev)
622{ 622{
623 struct jffs2_inode_info *f, *dir_f; 623 struct jffs2_inode_info *f, *dir_f;
624 struct jffs2_sb_info *c; 624 struct jffs2_sb_info *c;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index e7e974454115..8be4925296cf 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -45,7 +45,6 @@ static struct inode *jffs2_alloc_inode(struct super_block *sb)
45static void jffs2_i_callback(struct rcu_head *head) 45static void jffs2_i_callback(struct rcu_head *head)
46{ 46{
47 struct inode *inode = container_of(head, struct inode, i_rcu); 47 struct inode *inode = container_of(head, struct inode, i_rcu);
48 INIT_LIST_HEAD(&inode->i_dentry);
49 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); 48 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
50} 49}
51 50
@@ -97,9 +96,9 @@ static const char *jffs2_compr_name(unsigned int compr)
97 } 96 }
98} 97}
99 98
100static int jffs2_show_options(struct seq_file *s, struct vfsmount *mnt) 99static int jffs2_show_options(struct seq_file *s, struct dentry *root)
101{ 100{
102 struct jffs2_sb_info *c = JFFS2_SB_INFO(mnt->mnt_sb); 101 struct jffs2_sb_info *c = JFFS2_SB_INFO(root->d_sb);
103 struct jffs2_mount_opts *opts = &c->mount_opts; 102 struct jffs2_mount_opts *opts = &c->mount_opts;
104 103
105 if (opts->override_compr) 104 if (opts->override_compr)
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 6f98a1866776..f19d1e04a374 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -68,7 +68,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
68 unsigned int oldflags; 68 unsigned int oldflags;
69 int err; 69 int err;
70 70
71 err = mnt_want_write(filp->f_path.mnt); 71 err = mnt_want_write_file(filp);
72 if (err) 72 if (err)
73 return err; 73 return err;
74 74
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
120 inode->i_ctime = CURRENT_TIME_SEC; 120 inode->i_ctime = CURRENT_TIME_SEC;
121 mark_inode_dirty(inode); 121 mark_inode_dirty(inode);
122setflags_out: 122setflags_out:
123 mnt_drop_write(filp->f_path.mnt); 123 mnt_drop_write_file(filp);
124 return err; 124 return err;
125 } 125 }
126 default: 126 default:
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cc5f811ed383..2eb952c41a69 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2349,7 +2349,7 @@ int jfsIOWait(void *arg)
2349 2349
2350 if (freezing(current)) { 2350 if (freezing(current)) {
2351 spin_unlock_irq(&log_redrive_lock); 2351 spin_unlock_irq(&log_redrive_lock);
2352 refrigerator(); 2352 try_to_freeze();
2353 } else { 2353 } else {
2354 set_current_state(TASK_INTERRUPTIBLE); 2354 set_current_state(TASK_INTERRUPTIBLE);
2355 spin_unlock_irq(&log_redrive_lock); 2355 spin_unlock_irq(&log_redrive_lock);
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index af9606057dde..bb8b661bcc50 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2800,7 +2800,7 @@ int jfs_lazycommit(void *arg)
2800 2800
2801 if (freezing(current)) { 2801 if (freezing(current)) {
2802 LAZY_UNLOCK(flags); 2802 LAZY_UNLOCK(flags);
2803 refrigerator(); 2803 try_to_freeze();
2804 } else { 2804 } else {
2805 DECLARE_WAITQUEUE(wq, current); 2805 DECLARE_WAITQUEUE(wq, current);
2806 2806
@@ -2994,7 +2994,7 @@ int jfs_sync(void *arg)
2994 2994
2995 if (freezing(current)) { 2995 if (freezing(current)) {
2996 TXN_UNLOCK(); 2996 TXN_UNLOCK();
2997 refrigerator(); 2997 try_to_freeze();
2998 } else { 2998 } else {
2999 set_current_state(TASK_INTERRUPTIBLE); 2999 set_current_state(TASK_INTERRUPTIBLE);
3000 TXN_UNLOCK(); 3000 TXN_UNLOCK();
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a112ad96e474..5f7c160ea64f 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -72,7 +72,7 @@ static inline void free_ea_wmap(struct inode *inode)
72 * RETURN: Errors from subroutines 72 * RETURN: Errors from subroutines
73 * 73 *
74 */ 74 */
75static int jfs_create(struct inode *dip, struct dentry *dentry, int mode, 75static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
76 struct nameidata *nd) 76 struct nameidata *nd)
77{ 77{
78 int rc = 0; 78 int rc = 0;
@@ -205,7 +205,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
205 * note: 205 * note:
206 * EACCESS: user needs search+write permission on the parent directory 206 * EACCESS: user needs search+write permission on the parent directory
207 */ 207 */
208static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode) 208static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
209{ 209{
210 int rc = 0; 210 int rc = 0;
211 tid_t tid; /* transaction id */ 211 tid_t tid; /* transaction id */
@@ -1353,7 +1353,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1353 * FUNCTION: Create a special file (device) 1353 * FUNCTION: Create a special file (device)
1354 */ 1354 */
1355static int jfs_mknod(struct inode *dir, struct dentry *dentry, 1355static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1356 int mode, dev_t rdev) 1356 umode_t mode, dev_t rdev)
1357{ 1357{
1358 struct jfs_inode_info *jfs_ip; 1358 struct jfs_inode_info *jfs_ip;
1359 struct btstack btstack; 1359 struct btstack btstack;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index a44eff076c17..682bca642f38 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -119,7 +119,6 @@ static void jfs_i_callback(struct rcu_head *head)
119{ 119{
120 struct inode *inode = container_of(head, struct inode, i_rcu); 120 struct inode *inode = container_of(head, struct inode, i_rcu);
121 struct jfs_inode_info *ji = JFS_IP(inode); 121 struct jfs_inode_info *ji = JFS_IP(inode);
122 INIT_LIST_HEAD(&inode->i_dentry);
123 kmem_cache_free(jfs_inode_cachep, ji); 122 kmem_cache_free(jfs_inode_cachep, ji);
124} 123}
125 124
@@ -609,9 +608,9 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
609 return 0; 608 return 0;
610} 609}
611 610
612static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 611static int jfs_show_options(struct seq_file *seq, struct dentry *root)
613{ 612{
614 struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb); 613 struct jfs_sb_info *sbi = JFS_SBI(root->d_sb);
615 614
616 if (sbi->uid != -1) 615 if (sbi->uid != -1)
617 seq_printf(seq, ",uid=%d", sbi->uid); 616 seq_printf(seq, ",uid=%d", sbi->uid);
diff --git a/fs/libfs.c b/fs/libfs.c
index f6d411eef1e7..5b2dbb3ba4fc 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -12,7 +12,7 @@
12#include <linux/mutex.h> 12#include <linux/mutex.h>
13#include <linux/exportfs.h> 13#include <linux/exportfs.h>
14#include <linux/writeback.h> 14#include <linux/writeback.h>
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h> /* sync_mapping_buffers */
16 16
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18 18
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 1ca0679c80bf..2240d384d787 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -403,7 +403,7 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
403{ 403{
404 struct super_block *sb = datap; 404 struct super_block *sb = datap;
405 405
406 return sb == file->f_file->f_path.mnt->mnt_sb; 406 return sb == file->f_file->f_path.dentry->d_sb;
407} 407}
408 408
409/** 409/**
diff --git a/fs/locks.c b/fs/locks.c
index 3b0d05dcd7c1..637694bf3a03 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1205,6 +1205,8 @@ int __break_lease(struct inode *inode, unsigned int mode)
1205 int want_write = (mode & O_ACCMODE) != O_RDONLY; 1205 int want_write = (mode & O_ACCMODE) != O_RDONLY;
1206 1206
1207 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); 1207 new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1208 if (IS_ERR(new_fl))
1209 return PTR_ERR(new_fl);
1208 1210
1209 lock_flocks(); 1211 lock_flocks();
1210 1212
@@ -1221,12 +1223,6 @@ int __break_lease(struct inode *inode, unsigned int mode)
1221 if (fl->fl_owner == current->files) 1223 if (fl->fl_owner == current->files)
1222 i_have_this_lease = 1; 1224 i_have_this_lease = 1;
1223 1225
1224 if (IS_ERR(new_fl) && !i_have_this_lease
1225 && ((mode & O_NONBLOCK) == 0)) {
1226 error = PTR_ERR(new_fl);
1227 goto out;
1228 }
1229
1230 break_time = 0; 1226 break_time = 0;
1231 if (lease_break_time > 0) { 1227 if (lease_break_time > 0) {
1232 break_time = jiffies + lease_break_time * HZ; 1228 break_time = jiffies + lease_break_time * HZ;
@@ -1284,8 +1280,7 @@ restart:
1284 1280
1285out: 1281out:
1286 unlock_flocks(); 1282 unlock_flocks();
1287 if (!IS_ERR(new_fl)) 1283 locks_free_lock(new_fl);
1288 locks_free_lock(new_fl);
1289 return error; 1284 return error;
1290} 1285}
1291 1286
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index b7d7f67cee5a..501043e8966c 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -482,7 +482,7 @@ out:
482 return ret; 482 return ret;
483} 483}
484 484
485static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 485static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
486{ 486{
487 struct inode *inode; 487 struct inode *inode;
488 488
@@ -501,7 +501,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
501 return __logfs_create(dir, dentry, inode, NULL, 0); 501 return __logfs_create(dir, dentry, inode, NULL, 0);
502} 502}
503 503
504static int logfs_create(struct inode *dir, struct dentry *dentry, int mode, 504static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
505 struct nameidata *nd) 505 struct nameidata *nd)
506{ 506{
507 struct inode *inode; 507 struct inode *inode;
@@ -517,7 +517,7 @@ static int logfs_create(struct inode *dir, struct dentry *dentry, int mode,
517 return __logfs_create(dir, dentry, inode, NULL, 0); 517 return __logfs_create(dir, dentry, inode, NULL, 0);
518} 518}
519 519
520static int logfs_mknod(struct inode *dir, struct dentry *dentry, int mode, 520static int logfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
521 dev_t rdev) 521 dev_t rdev)
522{ 522{
523 struct inode *inode; 523 struct inode *inode;
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index 7e441ad5f792..388df1aa35e5 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -144,7 +144,6 @@ struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
144static void logfs_i_callback(struct rcu_head *head) 144static void logfs_i_callback(struct rcu_head *head)
145{ 145{
146 struct inode *inode = container_of(head, struct inode, i_rcu); 146 struct inode *inode = container_of(head, struct inode, i_rcu);
147 INIT_LIST_HEAD(&inode->i_dentry);
148 kmem_cache_free(logfs_inode_cache, logfs_inode(inode)); 147 kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
149} 148}
150 149
@@ -324,7 +323,7 @@ static void logfs_set_ino_generation(struct super_block *sb,
324 mutex_unlock(&super->s_journal_mutex); 323 mutex_unlock(&super->s_journal_mutex);
325} 324}
326 325
327struct inode *logfs_new_inode(struct inode *dir, int mode) 326struct inode *logfs_new_inode(struct inode *dir, umode_t mode)
328{ 327{
329 struct super_block *sb = dir->i_sb; 328 struct super_block *sb = dir->i_sb;
330 struct inode *inode; 329 struct inode *inode;
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 398ecff6e548..926373866a55 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -520,7 +520,7 @@ extern const struct super_operations logfs_super_operations;
520struct inode *logfs_iget(struct super_block *sb, ino_t ino); 520struct inode *logfs_iget(struct super_block *sb, ino_t ino);
521struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *cookie); 521struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *cookie);
522void logfs_safe_iput(struct inode *inode, int cookie); 522void logfs_safe_iput(struct inode *inode, int cookie);
523struct inode *logfs_new_inode(struct inode *dir, int mode); 523struct inode *logfs_new_inode(struct inode *dir, umode_t mode);
524struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino); 524struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino);
525struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino); 525struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino);
526int logfs_init_inode_cache(void); 526int logfs_init_inode_cache(void);
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index ef175cb8cfd8..4bc50dac8e97 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -209,7 +209,7 @@ void minix_free_inode(struct inode * inode)
209 mark_buffer_dirty(bh); 209 mark_buffer_dirty(bh);
210} 210}
211 211
212struct inode *minix_new_inode(const struct inode *dir, int mode, int *error) 212struct inode *minix_new_inode(const struct inode *dir, umode_t mode, int *error)
213{ 213{
214 struct super_block *sb = dir->i_sb; 214 struct super_block *sb = dir->i_sb;
215 struct minix_sb_info *sbi = minix_sb(sb); 215 struct minix_sb_info *sbi = minix_sb(sb);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 1d9e33966db0..fa8b612b8ce2 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -71,7 +71,6 @@ static struct inode *minix_alloc_inode(struct super_block *sb)
71static void minix_i_callback(struct rcu_head *head) 71static void minix_i_callback(struct rcu_head *head)
72{ 72{
73 struct inode *inode = container_of(head, struct inode, i_rcu); 73 struct inode *inode = container_of(head, struct inode, i_rcu);
74 INIT_LIST_HEAD(&inode->i_dentry);
75 kmem_cache_free(minix_inode_cachep, minix_i(inode)); 74 kmem_cache_free(minix_inode_cachep, minix_i(inode));
76} 75}
77 76
@@ -263,23 +262,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
263 goto out_no_root; 262 goto out_no_root;
264 } 263 }
265 264
266 ret = -ENOMEM;
267 s->s_root = d_alloc_root(root_inode);
268 if (!s->s_root)
269 goto out_iput;
270
271 if (!(s->s_flags & MS_RDONLY)) {
272 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
273 ms->s_state &= ~MINIX_VALID_FS;
274 mark_buffer_dirty(bh);
275 }
276 if (!(sbi->s_mount_state & MINIX_VALID_FS))
277 printk("MINIX-fs: mounting unchecked file system, "
278 "running fsck is recommended\n");
279 else if (sbi->s_mount_state & MINIX_ERROR_FS)
280 printk("MINIX-fs: mounting file system with errors, "
281 "running fsck is recommended\n");
282
283 /* Apparently minix can create filesystems that allocate more blocks for 265 /* Apparently minix can create filesystems that allocate more blocks for
284 * the bitmaps than needed. We simply ignore that, but verify it didn't 266 * the bitmaps than needed. We simply ignore that, but verify it didn't
285 * create one with not enough blocks and bail out if so. 267 * create one with not enough blocks and bail out if so.
@@ -300,6 +282,23 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
300 goto out_iput; 282 goto out_iput;
301 } 283 }
302 284
285 ret = -ENOMEM;
286 s->s_root = d_alloc_root(root_inode);
287 if (!s->s_root)
288 goto out_iput;
289
290 if (!(s->s_flags & MS_RDONLY)) {
291 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
292 ms->s_state &= ~MINIX_VALID_FS;
293 mark_buffer_dirty(bh);
294 }
295 if (!(sbi->s_mount_state & MINIX_VALID_FS))
296 printk("MINIX-fs: mounting unchecked file system, "
297 "running fsck is recommended\n");
298 else if (sbi->s_mount_state & MINIX_ERROR_FS)
299 printk("MINIX-fs: mounting file system with errors, "
300 "running fsck is recommended\n");
301
303 return 0; 302 return 0;
304 303
305out_iput: 304out_iput:
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 26bbd55e82ea..c889ef0aa571 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -46,7 +46,7 @@ struct minix_sb_info {
46extern struct inode *minix_iget(struct super_block *, unsigned long); 46extern struct inode *minix_iget(struct super_block *, unsigned long);
47extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **); 47extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, struct buffer_head **);
48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **); 48extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
49extern struct inode * minix_new_inode(const struct inode *, int, int *); 49extern struct inode * minix_new_inode(const struct inode *, umode_t, int *);
50extern void minix_free_inode(struct inode * inode); 50extern void minix_free_inode(struct inode * inode);
51extern unsigned long minix_count_free_inodes(struct super_block *sb); 51extern unsigned long minix_count_free_inodes(struct super_block *sb);
52extern int minix_new_block(struct inode * inode); 52extern int minix_new_block(struct inode * inode);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 6e6777f1b4b2..2f76e38c2065 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -36,7 +36,7 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, st
36 return NULL; 36 return NULL;
37} 37}
38 38
39static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 39static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
40{ 40{
41 int error; 41 int error;
42 struct inode *inode; 42 struct inode *inode;
@@ -54,7 +54,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, dev_
54 return error; 54 return error;
55} 55}
56 56
57static int minix_create(struct inode * dir, struct dentry *dentry, int mode, 57static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
58 struct nameidata *nd) 58 struct nameidata *nd)
59{ 59{
60 return minix_mknod(dir, dentry, mode, 0); 60 return minix_mknod(dir, dentry, mode, 0);
@@ -103,7 +103,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
103 return add_nondir(dentry, inode); 103 return add_nondir(dentry, inode);
104} 104}
105 105
106static int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode) 106static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
107{ 107{
108 struct inode * inode; 108 struct inode * inode;
109 int err = -EMLINK; 109 int err = -EMLINK;
diff --git a/fs/mount.h b/fs/mount.h
new file mode 100644
index 000000000000..4ef36d93e5a2
--- /dev/null
+++ b/fs/mount.h
@@ -0,0 +1,76 @@
1#include <linux/mount.h>
2#include <linux/seq_file.h>
3#include <linux/poll.h>
4
5struct mnt_namespace {
6 atomic_t count;
7 struct mount * root;
8 struct list_head list;
9 wait_queue_head_t poll;
10 int event;
11};
12
13struct mnt_pcp {
14 int mnt_count;
15 int mnt_writers;
16};
17
18struct mount {
19 struct list_head mnt_hash;
20 struct mount *mnt_parent;
21 struct dentry *mnt_mountpoint;
22 struct vfsmount mnt;
23#ifdef CONFIG_SMP
24 struct mnt_pcp __percpu *mnt_pcp;
25 atomic_t mnt_longterm; /* how many of the refs are longterm */
26#else
27 int mnt_count;
28 int mnt_writers;
29#endif
30 struct list_head mnt_mounts; /* list of children, anchored here */
31 struct list_head mnt_child; /* and going through their mnt_child */
32 struct list_head mnt_instance; /* mount instance on sb->s_mounts */
33 const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */
34 struct list_head mnt_list;
35 struct list_head mnt_expire; /* link in fs-specific expiry list */
36 struct list_head mnt_share; /* circular list of shared mounts */
37 struct list_head mnt_slave_list;/* list of slave mounts */
38 struct list_head mnt_slave; /* slave list entry */
39 struct mount *mnt_master; /* slave is on master->mnt_slave_list */
40 struct mnt_namespace *mnt_ns; /* containing namespace */
41#ifdef CONFIG_FSNOTIFY
42 struct hlist_head mnt_fsnotify_marks;
43 __u32 mnt_fsnotify_mask;
44#endif
45 int mnt_id; /* mount identifier */
46 int mnt_group_id; /* peer group identifier */
47 int mnt_expiry_mark; /* true if marked for expiry */
48 int mnt_pinned;
49 int mnt_ghosts;
50};
51
52static inline struct mount *real_mount(struct vfsmount *mnt)
53{
54 return container_of(mnt, struct mount, mnt);
55}
56
57static inline int mnt_has_parent(struct mount *mnt)
58{
59 return mnt != mnt->mnt_parent;
60}
61
62extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
63
64static inline void get_mnt_ns(struct mnt_namespace *ns)
65{
66 atomic_inc(&ns->count);
67}
68
69struct proc_mounts {
70 struct seq_file m; /* must be the first element */
71 struct mnt_namespace *ns;
72 struct path root;
73 int (*show)(struct seq_file *, struct vfsmount *);
74};
75
76extern const struct seq_operations mounts_op;
diff --git a/fs/namei.c b/fs/namei.c
index 5008f01787f5..c283a1ec008e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -36,6 +36,7 @@
36#include <asm/uaccess.h> 36#include <asm/uaccess.h>
37 37
38#include "internal.h" 38#include "internal.h"
39#include "mount.h"
39 40
40/* [Feb-1997 T. Schoebel-Theuer] 41/* [Feb-1997 T. Schoebel-Theuer]
41 * Fundamental changes in the pathname lookup mechanisms (namei) 42 * Fundamental changes in the pathname lookup mechanisms (namei)
@@ -676,36 +677,38 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
676 677
677static int follow_up_rcu(struct path *path) 678static int follow_up_rcu(struct path *path)
678{ 679{
679 struct vfsmount *parent; 680 struct mount *mnt = real_mount(path->mnt);
681 struct mount *parent;
680 struct dentry *mountpoint; 682 struct dentry *mountpoint;
681 683
682 parent = path->mnt->mnt_parent; 684 parent = mnt->mnt_parent;
683 if (parent == path->mnt) 685 if (&parent->mnt == path->mnt)
684 return 0; 686 return 0;
685 mountpoint = path->mnt->mnt_mountpoint; 687 mountpoint = mnt->mnt_mountpoint;
686 path->dentry = mountpoint; 688 path->dentry = mountpoint;
687 path->mnt = parent; 689 path->mnt = &parent->mnt;
688 return 1; 690 return 1;
689} 691}
690 692
691int follow_up(struct path *path) 693int follow_up(struct path *path)
692{ 694{
693 struct vfsmount *parent; 695 struct mount *mnt = real_mount(path->mnt);
696 struct mount *parent;
694 struct dentry *mountpoint; 697 struct dentry *mountpoint;
695 698
696 br_read_lock(vfsmount_lock); 699 br_read_lock(vfsmount_lock);
697 parent = path->mnt->mnt_parent; 700 parent = mnt->mnt_parent;
698 if (parent == path->mnt) { 701 if (&parent->mnt == path->mnt) {
699 br_read_unlock(vfsmount_lock); 702 br_read_unlock(vfsmount_lock);
700 return 0; 703 return 0;
701 } 704 }
702 mntget(parent); 705 mntget(&parent->mnt);
703 mountpoint = dget(path->mnt->mnt_mountpoint); 706 mountpoint = dget(mnt->mnt_mountpoint);
704 br_read_unlock(vfsmount_lock); 707 br_read_unlock(vfsmount_lock);
705 dput(path->dentry); 708 dput(path->dentry);
706 path->dentry = mountpoint; 709 path->dentry = mountpoint;
707 mntput(path->mnt); 710 mntput(path->mnt);
708 path->mnt = parent; 711 path->mnt = &parent->mnt;
709 return 1; 712 return 1;
710} 713}
711 714
@@ -884,7 +887,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
884 struct inode **inode) 887 struct inode **inode)
885{ 888{
886 for (;;) { 889 for (;;) {
887 struct vfsmount *mounted; 890 struct mount *mounted;
888 /* 891 /*
889 * Don't forget we might have a non-mountpoint managed dentry 892 * Don't forget we might have a non-mountpoint managed dentry
890 * that wants to block transit. 893 * that wants to block transit.
@@ -898,8 +901,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
898 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 901 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
899 if (!mounted) 902 if (!mounted)
900 break; 903 break;
901 path->mnt = mounted; 904 path->mnt = &mounted->mnt;
902 path->dentry = mounted->mnt_root; 905 path->dentry = mounted->mnt.mnt_root;
903 nd->flags |= LOOKUP_JUMPED; 906 nd->flags |= LOOKUP_JUMPED;
904 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 907 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
905 /* 908 /*
@@ -915,12 +918,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
915static void follow_mount_rcu(struct nameidata *nd) 918static void follow_mount_rcu(struct nameidata *nd)
916{ 919{
917 while (d_mountpoint(nd->path.dentry)) { 920 while (d_mountpoint(nd->path.dentry)) {
918 struct vfsmount *mounted; 921 struct mount *mounted;
919 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); 922 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
920 if (!mounted) 923 if (!mounted)
921 break; 924 break;
922 nd->path.mnt = mounted; 925 nd->path.mnt = &mounted->mnt;
923 nd->path.dentry = mounted->mnt_root; 926 nd->path.dentry = mounted->mnt.mnt_root;
924 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); 927 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
925 } 928 }
926} 929}
@@ -1976,7 +1979,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
1976 } 1979 }
1977} 1980}
1978 1981
1979int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1982int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
1980 struct nameidata *nd) 1983 struct nameidata *nd)
1981{ 1984{
1982 int error = may_create(dir, dentry); 1985 int error = may_create(dir, dentry);
@@ -2177,7 +2180,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2177 2180
2178 /* Negative dentry, just create the file */ 2181 /* Negative dentry, just create the file */
2179 if (!dentry->d_inode) { 2182 if (!dentry->d_inode) {
2180 int mode = op->mode; 2183 umode_t mode = op->mode;
2181 if (!IS_POSIXACL(dir->d_inode)) 2184 if (!IS_POSIXACL(dir->d_inode))
2182 mode &= ~current_umask(); 2185 mode &= ~current_umask();
2183 /* 2186 /*
@@ -2444,7 +2447,7 @@ struct dentry *user_path_create(int dfd, const char __user *pathname, struct pat
2444} 2447}
2445EXPORT_SYMBOL(user_path_create); 2448EXPORT_SYMBOL(user_path_create);
2446 2449
2447int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 2450int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
2448{ 2451{
2449 int error = may_create(dir, dentry); 2452 int error = may_create(dir, dentry);
2450 2453
@@ -2472,7 +2475,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2472 return error; 2475 return error;
2473} 2476}
2474 2477
2475static int may_mknod(mode_t mode) 2478static int may_mknod(umode_t mode)
2476{ 2479{
2477 switch (mode & S_IFMT) { 2480 switch (mode & S_IFMT) {
2478 case S_IFREG: 2481 case S_IFREG:
@@ -2489,7 +2492,7 @@ static int may_mknod(mode_t mode)
2489 } 2492 }
2490} 2493}
2491 2494
2492SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, 2495SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
2493 unsigned, dev) 2496 unsigned, dev)
2494{ 2497{
2495 struct dentry *dentry; 2498 struct dentry *dentry;
@@ -2536,12 +2539,12 @@ out_dput:
2536 return error; 2539 return error;
2537} 2540}
2538 2541
2539SYSCALL_DEFINE3(mknod, const char __user *, filename, int, mode, unsigned, dev) 2542SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
2540{ 2543{
2541 return sys_mknodat(AT_FDCWD, filename, mode, dev); 2544 return sys_mknodat(AT_FDCWD, filename, mode, dev);
2542} 2545}
2543 2546
2544int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 2547int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2545{ 2548{
2546 int error = may_create(dir, dentry); 2549 int error = may_create(dir, dentry);
2547 2550
@@ -2562,7 +2565,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2562 return error; 2565 return error;
2563} 2566}
2564 2567
2565SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) 2568SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
2566{ 2569{
2567 struct dentry *dentry; 2570 struct dentry *dentry;
2568 struct path path; 2571 struct path path;
@@ -2590,7 +2593,7 @@ out_dput:
2590 return error; 2593 return error;
2591} 2594}
2592 2595
2593SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) 2596SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
2594{ 2597{
2595 return sys_mkdirat(AT_FDCWD, pathname, mode); 2598 return sys_mkdirat(AT_FDCWD, pathname, mode);
2596} 2599}
diff --git a/fs/namespace.c b/fs/namespace.c
index cfc6d4448aa5..e6081996c9a2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -9,30 +9,17 @@
9 */ 9 */
10 10
11#include <linux/syscalls.h> 11#include <linux/syscalls.h>
12#include <linux/slab.h> 12#include <linux/export.h>
13#include <linux/sched.h>
14#include <linux/spinlock.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/kernel.h>
18#include <linux/acct.h>
19#include <linux/capability.h> 13#include <linux/capability.h>
20#include <linux/cpumask.h>
21#include <linux/module.h>
22#include <linux/sysfs.h>
23#include <linux/seq_file.h>
24#include <linux/mnt_namespace.h> 14#include <linux/mnt_namespace.h>
25#include <linux/namei.h> 15#include <linux/namei.h>
26#include <linux/nsproxy.h>
27#include <linux/security.h> 16#include <linux/security.h>
28#include <linux/mount.h>
29#include <linux/ramfs.h>
30#include <linux/log2.h>
31#include <linux/idr.h> 17#include <linux/idr.h>
32#include <linux/fs_struct.h> 18#include <linux/acct.h> /* acct_auto_close_mnt */
33#include <linux/fsnotify.h> 19#include <linux/ramfs.h> /* init_rootfs */
34#include <asm/uaccess.h> 20#include <linux/fs_struct.h> /* get_fs_root et.al. */
35#include <asm/unistd.h> 21#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
22#include <linux/uaccess.h>
36#include "pnode.h" 23#include "pnode.h"
37#include "internal.h" 24#include "internal.h"
38 25
@@ -78,7 +65,7 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
78 * allocation is serialized by namespace_sem, but we need the spinlock to 65 * allocation is serialized by namespace_sem, but we need the spinlock to
79 * serialize with freeing. 66 * serialize with freeing.
80 */ 67 */
81static int mnt_alloc_id(struct vfsmount *mnt) 68static int mnt_alloc_id(struct mount *mnt)
82{ 69{
83 int res; 70 int res;
84 71
@@ -95,7 +82,7 @@ retry:
95 return res; 82 return res;
96} 83}
97 84
98static void mnt_free_id(struct vfsmount *mnt) 85static void mnt_free_id(struct mount *mnt)
99{ 86{
100 int id = mnt->mnt_id; 87 int id = mnt->mnt_id;
101 spin_lock(&mnt_id_lock); 88 spin_lock(&mnt_id_lock);
@@ -110,7 +97,7 @@ static void mnt_free_id(struct vfsmount *mnt)
110 * 97 *
111 * mnt_group_ida is protected by namespace_sem 98 * mnt_group_ida is protected by namespace_sem
112 */ 99 */
113static int mnt_alloc_group_id(struct vfsmount *mnt) 100static int mnt_alloc_group_id(struct mount *mnt)
114{ 101{
115 int res; 102 int res;
116 103
@@ -129,7 +116,7 @@ static int mnt_alloc_group_id(struct vfsmount *mnt)
129/* 116/*
130 * Release a peer group ID 117 * Release a peer group ID
131 */ 118 */
132void mnt_release_group_id(struct vfsmount *mnt) 119void mnt_release_group_id(struct mount *mnt)
133{ 120{
134 int id = mnt->mnt_group_id; 121 int id = mnt->mnt_group_id;
135 ida_remove(&mnt_group_ida, id); 122 ida_remove(&mnt_group_ida, id);
@@ -141,7 +128,7 @@ void mnt_release_group_id(struct vfsmount *mnt)
141/* 128/*
142 * vfsmount lock must be held for read 129 * vfsmount lock must be held for read
143 */ 130 */
144static inline void mnt_add_count(struct vfsmount *mnt, int n) 131static inline void mnt_add_count(struct mount *mnt, int n)
145{ 132{
146#ifdef CONFIG_SMP 133#ifdef CONFIG_SMP
147 this_cpu_add(mnt->mnt_pcp->mnt_count, n); 134 this_cpu_add(mnt->mnt_pcp->mnt_count, n);
@@ -152,35 +139,10 @@ static inline void mnt_add_count(struct vfsmount *mnt, int n)
152#endif 139#endif
153} 140}
154 141
155static inline void mnt_set_count(struct vfsmount *mnt, int n)
156{
157#ifdef CONFIG_SMP
158 this_cpu_write(mnt->mnt_pcp->mnt_count, n);
159#else
160 mnt->mnt_count = n;
161#endif
162}
163
164/*
165 * vfsmount lock must be held for read
166 */
167static inline void mnt_inc_count(struct vfsmount *mnt)
168{
169 mnt_add_count(mnt, 1);
170}
171
172/*
173 * vfsmount lock must be held for read
174 */
175static inline void mnt_dec_count(struct vfsmount *mnt)
176{
177 mnt_add_count(mnt, -1);
178}
179
180/* 142/*
181 * vfsmount lock must be held for write 143 * vfsmount lock must be held for write
182 */ 144 */
183unsigned int mnt_get_count(struct vfsmount *mnt) 145unsigned int mnt_get_count(struct mount *mnt)
184{ 146{
185#ifdef CONFIG_SMP 147#ifdef CONFIG_SMP
186 unsigned int count = 0; 148 unsigned int count = 0;
@@ -196,9 +158,9 @@ unsigned int mnt_get_count(struct vfsmount *mnt)
196#endif 158#endif
197} 159}
198 160
199static struct vfsmount *alloc_vfsmnt(const char *name) 161static struct mount *alloc_vfsmnt(const char *name)
200{ 162{
201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 163 struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
202 if (mnt) { 164 if (mnt) {
203 int err; 165 int err;
204 166
@@ -277,7 +239,7 @@ int __mnt_is_readonly(struct vfsmount *mnt)
277} 239}
278EXPORT_SYMBOL_GPL(__mnt_is_readonly); 240EXPORT_SYMBOL_GPL(__mnt_is_readonly);
279 241
280static inline void mnt_inc_writers(struct vfsmount *mnt) 242static inline void mnt_inc_writers(struct mount *mnt)
281{ 243{
282#ifdef CONFIG_SMP 244#ifdef CONFIG_SMP
283 this_cpu_inc(mnt->mnt_pcp->mnt_writers); 245 this_cpu_inc(mnt->mnt_pcp->mnt_writers);
@@ -286,7 +248,7 @@ static inline void mnt_inc_writers(struct vfsmount *mnt)
286#endif 248#endif
287} 249}
288 250
289static inline void mnt_dec_writers(struct vfsmount *mnt) 251static inline void mnt_dec_writers(struct mount *mnt)
290{ 252{
291#ifdef CONFIG_SMP 253#ifdef CONFIG_SMP
292 this_cpu_dec(mnt->mnt_pcp->mnt_writers); 254 this_cpu_dec(mnt->mnt_pcp->mnt_writers);
@@ -295,7 +257,7 @@ static inline void mnt_dec_writers(struct vfsmount *mnt)
295#endif 257#endif
296} 258}
297 259
298static unsigned int mnt_get_writers(struct vfsmount *mnt) 260static unsigned int mnt_get_writers(struct mount *mnt)
299{ 261{
300#ifdef CONFIG_SMP 262#ifdef CONFIG_SMP
301 unsigned int count = 0; 263 unsigned int count = 0;
@@ -311,6 +273,15 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
311#endif 273#endif
312} 274}
313 275
276static int mnt_is_readonly(struct vfsmount *mnt)
277{
278 if (mnt->mnt_sb->s_readonly_remount)
279 return 1;
280 /* Order wrt setting s_flags/s_readonly_remount in do_remount() */
281 smp_rmb();
282 return __mnt_is_readonly(mnt);
283}
284
314/* 285/*
315 * Most r/o checks on a fs are for operations that take 286 * Most r/o checks on a fs are for operations that take
316 * discrete amounts of time, like a write() or unlink(). 287 * discrete amounts of time, like a write() or unlink().
@@ -321,7 +292,7 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
321 */ 292 */
322/** 293/**
323 * mnt_want_write - get write access to a mount 294 * mnt_want_write - get write access to a mount
324 * @mnt: the mount on which to take a write 295 * @m: the mount on which to take a write
325 * 296 *
326 * This tells the low-level filesystem that a write is 297 * This tells the low-level filesystem that a write is
327 * about to be performed to it, and makes sure that 298 * about to be performed to it, and makes sure that
@@ -329,8 +300,9 @@ static unsigned int mnt_get_writers(struct vfsmount *mnt)
329 * the write operation is finished, mnt_drop_write() 300 * the write operation is finished, mnt_drop_write()
330 * must be called. This is effectively a refcount. 301 * must be called. This is effectively a refcount.
331 */ 302 */
332int mnt_want_write(struct vfsmount *mnt) 303int mnt_want_write(struct vfsmount *m)
333{ 304{
305 struct mount *mnt = real_mount(m);
334 int ret = 0; 306 int ret = 0;
335 307
336 preempt_disable(); 308 preempt_disable();
@@ -341,7 +313,7 @@ int mnt_want_write(struct vfsmount *mnt)
341 * incremented count after it has set MNT_WRITE_HOLD. 313 * incremented count after it has set MNT_WRITE_HOLD.
342 */ 314 */
343 smp_mb(); 315 smp_mb();
344 while (mnt->mnt_flags & MNT_WRITE_HOLD) 316 while (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
345 cpu_relax(); 317 cpu_relax();
346 /* 318 /*
347 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will 319 * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
@@ -349,12 +321,10 @@ int mnt_want_write(struct vfsmount *mnt)
349 * MNT_WRITE_HOLD is cleared. 321 * MNT_WRITE_HOLD is cleared.
350 */ 322 */
351 smp_rmb(); 323 smp_rmb();
352 if (__mnt_is_readonly(mnt)) { 324 if (mnt_is_readonly(m)) {
353 mnt_dec_writers(mnt); 325 mnt_dec_writers(mnt);
354 ret = -EROFS; 326 ret = -EROFS;
355 goto out;
356 } 327 }
357out:
358 preempt_enable(); 328 preempt_enable();
359 return ret; 329 return ret;
360} 330}
@@ -378,7 +348,7 @@ int mnt_clone_write(struct vfsmount *mnt)
378 if (__mnt_is_readonly(mnt)) 348 if (__mnt_is_readonly(mnt))
379 return -EROFS; 349 return -EROFS;
380 preempt_disable(); 350 preempt_disable();
381 mnt_inc_writers(mnt); 351 mnt_inc_writers(real_mount(mnt));
382 preempt_enable(); 352 preempt_enable();
383 return 0; 353 return 0;
384} 354}
@@ -412,17 +382,23 @@ EXPORT_SYMBOL_GPL(mnt_want_write_file);
412void mnt_drop_write(struct vfsmount *mnt) 382void mnt_drop_write(struct vfsmount *mnt)
413{ 383{
414 preempt_disable(); 384 preempt_disable();
415 mnt_dec_writers(mnt); 385 mnt_dec_writers(real_mount(mnt));
416 preempt_enable(); 386 preempt_enable();
417} 387}
418EXPORT_SYMBOL_GPL(mnt_drop_write); 388EXPORT_SYMBOL_GPL(mnt_drop_write);
419 389
420static int mnt_make_readonly(struct vfsmount *mnt) 390void mnt_drop_write_file(struct file *file)
391{
392 mnt_drop_write(file->f_path.mnt);
393}
394EXPORT_SYMBOL(mnt_drop_write_file);
395
396static int mnt_make_readonly(struct mount *mnt)
421{ 397{
422 int ret = 0; 398 int ret = 0;
423 399
424 br_write_lock(vfsmount_lock); 400 br_write_lock(vfsmount_lock);
425 mnt->mnt_flags |= MNT_WRITE_HOLD; 401 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
426 /* 402 /*
427 * After storing MNT_WRITE_HOLD, we'll read the counters. This store 403 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
428 * should be visible before we do. 404 * should be visible before we do.
@@ -448,25 +424,61 @@ static int mnt_make_readonly(struct vfsmount *mnt)
448 if (mnt_get_writers(mnt) > 0) 424 if (mnt_get_writers(mnt) > 0)
449 ret = -EBUSY; 425 ret = -EBUSY;
450 else 426 else
451 mnt->mnt_flags |= MNT_READONLY; 427 mnt->mnt.mnt_flags |= MNT_READONLY;
452 /* 428 /*
453 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers 429 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
454 * that become unheld will see MNT_READONLY. 430 * that become unheld will see MNT_READONLY.
455 */ 431 */
456 smp_wmb(); 432 smp_wmb();
457 mnt->mnt_flags &= ~MNT_WRITE_HOLD; 433 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
458 br_write_unlock(vfsmount_lock); 434 br_write_unlock(vfsmount_lock);
459 return ret; 435 return ret;
460} 436}
461 437
462static void __mnt_unmake_readonly(struct vfsmount *mnt) 438static void __mnt_unmake_readonly(struct mount *mnt)
463{ 439{
464 br_write_lock(vfsmount_lock); 440 br_write_lock(vfsmount_lock);
465 mnt->mnt_flags &= ~MNT_READONLY; 441 mnt->mnt.mnt_flags &= ~MNT_READONLY;
442 br_write_unlock(vfsmount_lock);
443}
444
445int sb_prepare_remount_readonly(struct super_block *sb)
446{
447 struct mount *mnt;
448 int err = 0;
449
450 /* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */
451 if (atomic_long_read(&sb->s_remove_count))
452 return -EBUSY;
453
454 br_write_lock(vfsmount_lock);
455 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
456 if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
457 mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
458 smp_mb();
459 if (mnt_get_writers(mnt) > 0) {
460 err = -EBUSY;
461 break;
462 }
463 }
464 }
465 if (!err && atomic_long_read(&sb->s_remove_count))
466 err = -EBUSY;
467
468 if (!err) {
469 sb->s_readonly_remount = 1;
470 smp_wmb();
471 }
472 list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
473 if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
474 mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
475 }
466 br_write_unlock(vfsmount_lock); 476 br_write_unlock(vfsmount_lock);
477
478 return err;
467} 479}
468 480
469static void free_vfsmnt(struct vfsmount *mnt) 481static void free_vfsmnt(struct mount *mnt)
470{ 482{
471 kfree(mnt->mnt_devname); 483 kfree(mnt->mnt_devname);
472 mnt_free_id(mnt); 484 mnt_free_id(mnt);
@@ -481,20 +493,20 @@ static void free_vfsmnt(struct vfsmount *mnt)
481 * @dir. If @dir is set return the first mount else return the last mount. 493 * @dir. If @dir is set return the first mount else return the last mount.
482 * vfsmount_lock must be held for read or write. 494 * vfsmount_lock must be held for read or write.
483 */ 495 */
484struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, 496struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
485 int dir) 497 int dir)
486{ 498{
487 struct list_head *head = mount_hashtable + hash(mnt, dentry); 499 struct list_head *head = mount_hashtable + hash(mnt, dentry);
488 struct list_head *tmp = head; 500 struct list_head *tmp = head;
489 struct vfsmount *p, *found = NULL; 501 struct mount *p, *found = NULL;
490 502
491 for (;;) { 503 for (;;) {
492 tmp = dir ? tmp->next : tmp->prev; 504 tmp = dir ? tmp->next : tmp->prev;
493 p = NULL; 505 p = NULL;
494 if (tmp == head) 506 if (tmp == head)
495 break; 507 break;
496 p = list_entry(tmp, struct vfsmount, mnt_hash); 508 p = list_entry(tmp, struct mount, mnt_hash);
497 if (p->mnt_parent == mnt && p->mnt_mountpoint == dentry) { 509 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) {
498 found = p; 510 found = p;
499 break; 511 break;
500 } 512 }
@@ -508,16 +520,21 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
508 */ 520 */
509struct vfsmount *lookup_mnt(struct path *path) 521struct vfsmount *lookup_mnt(struct path *path)
510{ 522{
511 struct vfsmount *child_mnt; 523 struct mount *child_mnt;
512 524
513 br_read_lock(vfsmount_lock); 525 br_read_lock(vfsmount_lock);
514 if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) 526 child_mnt = __lookup_mnt(path->mnt, path->dentry, 1);
515 mntget(child_mnt); 527 if (child_mnt) {
516 br_read_unlock(vfsmount_lock); 528 mnt_add_count(child_mnt, 1);
517 return child_mnt; 529 br_read_unlock(vfsmount_lock);
530 return &child_mnt->mnt;
531 } else {
532 br_read_unlock(vfsmount_lock);
533 return NULL;
534 }
518} 535}
519 536
520static inline int check_mnt(struct vfsmount *mnt) 537static inline int check_mnt(struct mount *mnt)
521{ 538{
522 return mnt->mnt_ns == current->nsproxy->mnt_ns; 539 return mnt->mnt_ns == current->nsproxy->mnt_ns;
523} 540}
@@ -548,12 +565,12 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
548 * Clear dentry's mounted state if it has no remaining mounts. 565 * Clear dentry's mounted state if it has no remaining mounts.
549 * vfsmount_lock must be held for write. 566 * vfsmount_lock must be held for write.
550 */ 567 */
551static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry) 568static void dentry_reset_mounted(struct dentry *dentry)
552{ 569{
553 unsigned u; 570 unsigned u;
554 571
555 for (u = 0; u < HASH_SIZE; u++) { 572 for (u = 0; u < HASH_SIZE; u++) {
556 struct vfsmount *p; 573 struct mount *p;
557 574
558 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) { 575 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
559 if (p->mnt_mountpoint == dentry) 576 if (p->mnt_mountpoint == dentry)
@@ -568,25 +585,26 @@ static void dentry_reset_mounted(struct vfsmount *mnt, struct dentry *dentry)
568/* 585/*
569 * vfsmount lock must be held for write 586 * vfsmount lock must be held for write
570 */ 587 */
571static void detach_mnt(struct vfsmount *mnt, struct path *old_path) 588static void detach_mnt(struct mount *mnt, struct path *old_path)
572{ 589{
573 old_path->dentry = mnt->mnt_mountpoint; 590 old_path->dentry = mnt->mnt_mountpoint;
574 old_path->mnt = mnt->mnt_parent; 591 old_path->mnt = &mnt->mnt_parent->mnt;
575 mnt->mnt_parent = mnt; 592 mnt->mnt_parent = mnt;
576 mnt->mnt_mountpoint = mnt->mnt_root; 593 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
577 list_del_init(&mnt->mnt_child); 594 list_del_init(&mnt->mnt_child);
578 list_del_init(&mnt->mnt_hash); 595 list_del_init(&mnt->mnt_hash);
579 dentry_reset_mounted(old_path->mnt, old_path->dentry); 596 dentry_reset_mounted(old_path->dentry);
580} 597}
581 598
582/* 599/*
583 * vfsmount lock must be held for write 600 * vfsmount lock must be held for write
584 */ 601 */
585void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, 602void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry,
586 struct vfsmount *child_mnt) 603 struct mount *child_mnt)
587{ 604{
588 child_mnt->mnt_parent = mntget(mnt); 605 mnt_add_count(mnt, 1); /* essentially, that's mntget */
589 child_mnt->mnt_mountpoint = dget(dentry); 606 child_mnt->mnt_mountpoint = dget(dentry);
607 child_mnt->mnt_parent = mnt;
590 spin_lock(&dentry->d_lock); 608 spin_lock(&dentry->d_lock);
591 dentry->d_flags |= DCACHE_MOUNTED; 609 dentry->d_flags |= DCACHE_MOUNTED;
592 spin_unlock(&dentry->d_lock); 610 spin_unlock(&dentry->d_lock);
@@ -595,15 +613,15 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
595/* 613/*
596 * vfsmount lock must be held for write 614 * vfsmount lock must be held for write
597 */ 615 */
598static void attach_mnt(struct vfsmount *mnt, struct path *path) 616static void attach_mnt(struct mount *mnt, struct path *path)
599{ 617{
600 mnt_set_mountpoint(path->mnt, path->dentry, mnt); 618 mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt);
601 list_add_tail(&mnt->mnt_hash, mount_hashtable + 619 list_add_tail(&mnt->mnt_hash, mount_hashtable +
602 hash(path->mnt, path->dentry)); 620 hash(path->mnt, path->dentry));
603 list_add_tail(&mnt->mnt_child, &path->mnt->mnt_mounts); 621 list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
604} 622}
605 623
606static inline void __mnt_make_longterm(struct vfsmount *mnt) 624static inline void __mnt_make_longterm(struct mount *mnt)
607{ 625{
608#ifdef CONFIG_SMP 626#ifdef CONFIG_SMP
609 atomic_inc(&mnt->mnt_longterm); 627 atomic_inc(&mnt->mnt_longterm);
@@ -611,7 +629,7 @@ static inline void __mnt_make_longterm(struct vfsmount *mnt)
611} 629}
612 630
613/* needs vfsmount lock for write */ 631/* needs vfsmount lock for write */
614static inline void __mnt_make_shortterm(struct vfsmount *mnt) 632static inline void __mnt_make_shortterm(struct mount *mnt)
615{ 633{
616#ifdef CONFIG_SMP 634#ifdef CONFIG_SMP
617 atomic_dec(&mnt->mnt_longterm); 635 atomic_dec(&mnt->mnt_longterm);
@@ -621,10 +639,10 @@ static inline void __mnt_make_shortterm(struct vfsmount *mnt)
621/* 639/*
622 * vfsmount lock must be held for write 640 * vfsmount lock must be held for write
623 */ 641 */
624static void commit_tree(struct vfsmount *mnt) 642static void commit_tree(struct mount *mnt)
625{ 643{
626 struct vfsmount *parent = mnt->mnt_parent; 644 struct mount *parent = mnt->mnt_parent;
627 struct vfsmount *m; 645 struct mount *m;
628 LIST_HEAD(head); 646 LIST_HEAD(head);
629 struct mnt_namespace *n = parent->mnt_ns; 647 struct mnt_namespace *n = parent->mnt_ns;
630 648
@@ -639,12 +657,12 @@ static void commit_tree(struct vfsmount *mnt)
639 list_splice(&head, n->list.prev); 657 list_splice(&head, n->list.prev);
640 658
641 list_add_tail(&mnt->mnt_hash, mount_hashtable + 659 list_add_tail(&mnt->mnt_hash, mount_hashtable +
642 hash(parent, mnt->mnt_mountpoint)); 660 hash(&parent->mnt, mnt->mnt_mountpoint));
643 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 661 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
644 touch_mnt_namespace(n); 662 touch_mnt_namespace(n);
645} 663}
646 664
647static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root) 665static struct mount *next_mnt(struct mount *p, struct mount *root)
648{ 666{
649 struct list_head *next = p->mnt_mounts.next; 667 struct list_head *next = p->mnt_mounts.next;
650 if (next == &p->mnt_mounts) { 668 if (next == &p->mnt_mounts) {
@@ -657,14 +675,14 @@ static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
657 p = p->mnt_parent; 675 p = p->mnt_parent;
658 } 676 }
659 } 677 }
660 return list_entry(next, struct vfsmount, mnt_child); 678 return list_entry(next, struct mount, mnt_child);
661} 679}
662 680
663static struct vfsmount *skip_mnt_tree(struct vfsmount *p) 681static struct mount *skip_mnt_tree(struct mount *p)
664{ 682{
665 struct list_head *prev = p->mnt_mounts.prev; 683 struct list_head *prev = p->mnt_mounts.prev;
666 while (prev != &p->mnt_mounts) { 684 while (prev != &p->mnt_mounts) {
667 p = list_entry(prev, struct vfsmount, mnt_child); 685 p = list_entry(prev, struct mount, mnt_child);
668 prev = p->mnt_mounts.prev; 686 prev = p->mnt_mounts.prev;
669 } 687 }
670 return p; 688 return p;
@@ -673,7 +691,7 @@ static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
673struct vfsmount * 691struct vfsmount *
674vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) 692vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
675{ 693{
676 struct vfsmount *mnt; 694 struct mount *mnt;
677 struct dentry *root; 695 struct dentry *root;
678 696
679 if (!type) 697 if (!type)
@@ -684,7 +702,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
684 return ERR_PTR(-ENOMEM); 702 return ERR_PTR(-ENOMEM);
685 703
686 if (flags & MS_KERNMOUNT) 704 if (flags & MS_KERNMOUNT)
687 mnt->mnt_flags = MNT_INTERNAL; 705 mnt->mnt.mnt_flags = MNT_INTERNAL;
688 706
689 root = mount_fs(type, flags, name, data); 707 root = mount_fs(type, flags, name, data);
690 if (IS_ERR(root)) { 708 if (IS_ERR(root)) {
@@ -692,19 +710,22 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
692 return ERR_CAST(root); 710 return ERR_CAST(root);
693 } 711 }
694 712
695 mnt->mnt_root = root; 713 mnt->mnt.mnt_root = root;
696 mnt->mnt_sb = root->d_sb; 714 mnt->mnt.mnt_sb = root->d_sb;
697 mnt->mnt_mountpoint = mnt->mnt_root; 715 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
698 mnt->mnt_parent = mnt; 716 mnt->mnt_parent = mnt;
699 return mnt; 717 br_write_lock(vfsmount_lock);
718 list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
719 br_write_unlock(vfsmount_lock);
720 return &mnt->mnt;
700} 721}
701EXPORT_SYMBOL_GPL(vfs_kern_mount); 722EXPORT_SYMBOL_GPL(vfs_kern_mount);
702 723
703static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, 724static struct mount *clone_mnt(struct mount *old, struct dentry *root,
704 int flag) 725 int flag)
705{ 726{
706 struct super_block *sb = old->mnt_sb; 727 struct super_block *sb = old->mnt.mnt_sb;
707 struct vfsmount *mnt = alloc_vfsmnt(old->mnt_devname); 728 struct mount *mnt = alloc_vfsmnt(old->mnt_devname);
708 729
709 if (mnt) { 730 if (mnt) {
710 if (flag & (CL_SLAVE | CL_PRIVATE)) 731 if (flag & (CL_SLAVE | CL_PRIVATE))
@@ -718,12 +739,15 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
718 goto out_free; 739 goto out_free;
719 } 740 }
720 741
721 mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD; 742 mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
722 atomic_inc(&sb->s_active); 743 atomic_inc(&sb->s_active);
723 mnt->mnt_sb = sb; 744 mnt->mnt.mnt_sb = sb;
724 mnt->mnt_root = dget(root); 745 mnt->mnt.mnt_root = dget(root);
725 mnt->mnt_mountpoint = mnt->mnt_root; 746 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
726 mnt->mnt_parent = mnt; 747 mnt->mnt_parent = mnt;
748 br_write_lock(vfsmount_lock);
749 list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
750 br_write_unlock(vfsmount_lock);
727 751
728 if (flag & CL_SLAVE) { 752 if (flag & CL_SLAVE) {
729 list_add(&mnt->mnt_slave, &old->mnt_slave_list); 753 list_add(&mnt->mnt_slave, &old->mnt_slave_list);
@@ -753,9 +777,10 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
753 return NULL; 777 return NULL;
754} 778}
755 779
756static inline void mntfree(struct vfsmount *mnt) 780static inline void mntfree(struct mount *mnt)
757{ 781{
758 struct super_block *sb = mnt->mnt_sb; 782 struct vfsmount *m = &mnt->mnt;
783 struct super_block *sb = m->mnt_sb;
759 784
760 /* 785 /*
761 * This probably indicates that somebody messed 786 * This probably indicates that somebody messed
@@ -768,32 +793,32 @@ static inline void mntfree(struct vfsmount *mnt)
768 * so mnt_get_writers() below is safe. 793 * so mnt_get_writers() below is safe.
769 */ 794 */
770 WARN_ON(mnt_get_writers(mnt)); 795 WARN_ON(mnt_get_writers(mnt));
771 fsnotify_vfsmount_delete(mnt); 796 fsnotify_vfsmount_delete(m);
772 dput(mnt->mnt_root); 797 dput(m->mnt_root);
773 free_vfsmnt(mnt); 798 free_vfsmnt(mnt);
774 deactivate_super(sb); 799 deactivate_super(sb);
775} 800}
776 801
777static void mntput_no_expire(struct vfsmount *mnt) 802static void mntput_no_expire(struct mount *mnt)
778{ 803{
779put_again: 804put_again:
780#ifdef CONFIG_SMP 805#ifdef CONFIG_SMP
781 br_read_lock(vfsmount_lock); 806 br_read_lock(vfsmount_lock);
782 if (likely(atomic_read(&mnt->mnt_longterm))) { 807 if (likely(atomic_read(&mnt->mnt_longterm))) {
783 mnt_dec_count(mnt); 808 mnt_add_count(mnt, -1);
784 br_read_unlock(vfsmount_lock); 809 br_read_unlock(vfsmount_lock);
785 return; 810 return;
786 } 811 }
787 br_read_unlock(vfsmount_lock); 812 br_read_unlock(vfsmount_lock);
788 813
789 br_write_lock(vfsmount_lock); 814 br_write_lock(vfsmount_lock);
790 mnt_dec_count(mnt); 815 mnt_add_count(mnt, -1);
791 if (mnt_get_count(mnt)) { 816 if (mnt_get_count(mnt)) {
792 br_write_unlock(vfsmount_lock); 817 br_write_unlock(vfsmount_lock);
793 return; 818 return;
794 } 819 }
795#else 820#else
796 mnt_dec_count(mnt); 821 mnt_add_count(mnt, -1);
797 if (likely(mnt_get_count(mnt))) 822 if (likely(mnt_get_count(mnt)))
798 return; 823 return;
799 br_write_lock(vfsmount_lock); 824 br_write_lock(vfsmount_lock);
@@ -802,9 +827,10 @@ put_again:
802 mnt_add_count(mnt, mnt->mnt_pinned + 1); 827 mnt_add_count(mnt, mnt->mnt_pinned + 1);
803 mnt->mnt_pinned = 0; 828 mnt->mnt_pinned = 0;
804 br_write_unlock(vfsmount_lock); 829 br_write_unlock(vfsmount_lock);
805 acct_auto_close_mnt(mnt); 830 acct_auto_close_mnt(&mnt->mnt);
806 goto put_again; 831 goto put_again;
807 } 832 }
833 list_del(&mnt->mnt_instance);
808 br_write_unlock(vfsmount_lock); 834 br_write_unlock(vfsmount_lock);
809 mntfree(mnt); 835 mntfree(mnt);
810} 836}
@@ -812,10 +838,11 @@ put_again:
812void mntput(struct vfsmount *mnt) 838void mntput(struct vfsmount *mnt)
813{ 839{
814 if (mnt) { 840 if (mnt) {
841 struct mount *m = real_mount(mnt);
815 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ 842 /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
816 if (unlikely(mnt->mnt_expiry_mark)) 843 if (unlikely(m->mnt_expiry_mark))
817 mnt->mnt_expiry_mark = 0; 844 m->mnt_expiry_mark = 0;
818 mntput_no_expire(mnt); 845 mntput_no_expire(m);
819 } 846 }
820} 847}
821EXPORT_SYMBOL(mntput); 848EXPORT_SYMBOL(mntput);
@@ -823,7 +850,7 @@ EXPORT_SYMBOL(mntput);
823struct vfsmount *mntget(struct vfsmount *mnt) 850struct vfsmount *mntget(struct vfsmount *mnt)
824{ 851{
825 if (mnt) 852 if (mnt)
826 mnt_inc_count(mnt); 853 mnt_add_count(real_mount(mnt), 1);
827 return mnt; 854 return mnt;
828} 855}
829EXPORT_SYMBOL(mntget); 856EXPORT_SYMBOL(mntget);
@@ -831,16 +858,17 @@ EXPORT_SYMBOL(mntget);
831void mnt_pin(struct vfsmount *mnt) 858void mnt_pin(struct vfsmount *mnt)
832{ 859{
833 br_write_lock(vfsmount_lock); 860 br_write_lock(vfsmount_lock);
834 mnt->mnt_pinned++; 861 real_mount(mnt)->mnt_pinned++;
835 br_write_unlock(vfsmount_lock); 862 br_write_unlock(vfsmount_lock);
836} 863}
837EXPORT_SYMBOL(mnt_pin); 864EXPORT_SYMBOL(mnt_pin);
838 865
839void mnt_unpin(struct vfsmount *mnt) 866void mnt_unpin(struct vfsmount *m)
840{ 867{
868 struct mount *mnt = real_mount(m);
841 br_write_lock(vfsmount_lock); 869 br_write_lock(vfsmount_lock);
842 if (mnt->mnt_pinned) { 870 if (mnt->mnt_pinned) {
843 mnt_inc_count(mnt); 871 mnt_add_count(mnt, 1);
844 mnt->mnt_pinned--; 872 mnt->mnt_pinned--;
845 } 873 }
846 br_write_unlock(vfsmount_lock); 874 br_write_unlock(vfsmount_lock);
@@ -858,12 +886,12 @@ static inline void mangle(struct seq_file *m, const char *s)
858 * 886 *
859 * See also save_mount_options(). 887 * See also save_mount_options().
860 */ 888 */
861int generic_show_options(struct seq_file *m, struct vfsmount *mnt) 889int generic_show_options(struct seq_file *m, struct dentry *root)
862{ 890{
863 const char *options; 891 const char *options;
864 892
865 rcu_read_lock(); 893 rcu_read_lock();
866 options = rcu_dereference(mnt->mnt_sb->s_options); 894 options = rcu_dereference(root->d_sb->s_options);
867 895
868 if (options != NULL && options[0]) { 896 if (options != NULL && options[0]) {
869 seq_putc(m, ','); 897 seq_putc(m, ',');
@@ -907,10 +935,10 @@ void replace_mount_options(struct super_block *sb, char *options)
907EXPORT_SYMBOL(replace_mount_options); 935EXPORT_SYMBOL(replace_mount_options);
908 936
909#ifdef CONFIG_PROC_FS 937#ifdef CONFIG_PROC_FS
910/* iterator */ 938/* iterator; we want it to have access to namespace_sem, thus here... */
911static void *m_start(struct seq_file *m, loff_t *pos) 939static void *m_start(struct seq_file *m, loff_t *pos)
912{ 940{
913 struct proc_mounts *p = m->private; 941 struct proc_mounts *p = container_of(m, struct proc_mounts, m);
914 942
915 down_read(&namespace_sem); 943 down_read(&namespace_sem);
916 return seq_list_start(&p->ns->list, *pos); 944 return seq_list_start(&p->ns->list, *pos);
@@ -918,7 +946,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
918 946
919static void *m_next(struct seq_file *m, void *v, loff_t *pos) 947static void *m_next(struct seq_file *m, void *v, loff_t *pos)
920{ 948{
921 struct proc_mounts *p = m->private; 949 struct proc_mounts *p = container_of(m, struct proc_mounts, m);
922 950
923 return seq_list_next(v, &p->ns->list, pos); 951 return seq_list_next(v, &p->ns->list, pos);
924} 952}
@@ -928,219 +956,18 @@ static void m_stop(struct seq_file *m, void *v)
928 up_read(&namespace_sem); 956 up_read(&namespace_sem);
929} 957}
930 958
931int mnt_had_events(struct proc_mounts *p) 959static int m_show(struct seq_file *m, void *v)
932{ 960{
933 struct mnt_namespace *ns = p->ns; 961 struct proc_mounts *p = container_of(m, struct proc_mounts, m);
934 int res = 0; 962 struct mount *r = list_entry(v, struct mount, mnt_list);
935 963 return p->show(m, &r->mnt);
936 br_read_lock(vfsmount_lock);
937 if (p->m.poll_event != ns->event) {
938 p->m.poll_event = ns->event;
939 res = 1;
940 }
941 br_read_unlock(vfsmount_lock);
942
943 return res;
944}
945
946struct proc_fs_info {
947 int flag;
948 const char *str;
949};
950
951static int show_sb_opts(struct seq_file *m, struct super_block *sb)
952{
953 static const struct proc_fs_info fs_info[] = {
954 { MS_SYNCHRONOUS, ",sync" },
955 { MS_DIRSYNC, ",dirsync" },
956 { MS_MANDLOCK, ",mand" },
957 { 0, NULL }
958 };
959 const struct proc_fs_info *fs_infop;
960
961 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
962 if (sb->s_flags & fs_infop->flag)
963 seq_puts(m, fs_infop->str);
964 }
965
966 return security_sb_show_options(m, sb);
967}
968
969static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
970{
971 static const struct proc_fs_info mnt_info[] = {
972 { MNT_NOSUID, ",nosuid" },
973 { MNT_NODEV, ",nodev" },
974 { MNT_NOEXEC, ",noexec" },
975 { MNT_NOATIME, ",noatime" },
976 { MNT_NODIRATIME, ",nodiratime" },
977 { MNT_RELATIME, ",relatime" },
978 { 0, NULL }
979 };
980 const struct proc_fs_info *fs_infop;
981
982 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
983 if (mnt->mnt_flags & fs_infop->flag)
984 seq_puts(m, fs_infop->str);
985 }
986}
987
988static void show_type(struct seq_file *m, struct super_block *sb)
989{
990 mangle(m, sb->s_type->name);
991 if (sb->s_subtype && sb->s_subtype[0]) {
992 seq_putc(m, '.');
993 mangle(m, sb->s_subtype);
994 }
995}
996
997static int show_vfsmnt(struct seq_file *m, void *v)
998{
999 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
1000 int err = 0;
1001 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
1002
1003 if (mnt->mnt_sb->s_op->show_devname) {
1004 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
1005 if (err)
1006 goto out;
1007 } else {
1008 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
1009 }
1010 seq_putc(m, ' ');
1011 seq_path(m, &mnt_path, " \t\n\\");
1012 seq_putc(m, ' ');
1013 show_type(m, mnt->mnt_sb);
1014 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
1015 err = show_sb_opts(m, mnt->mnt_sb);
1016 if (err)
1017 goto out;
1018 show_mnt_opts(m, mnt);
1019 if (mnt->mnt_sb->s_op->show_options)
1020 err = mnt->mnt_sb->s_op->show_options(m, mnt);
1021 seq_puts(m, " 0 0\n");
1022out:
1023 return err;
1024} 964}
1025 965
1026const struct seq_operations mounts_op = { 966const struct seq_operations mounts_op = {
1027 .start = m_start, 967 .start = m_start,
1028 .next = m_next, 968 .next = m_next,
1029 .stop = m_stop, 969 .stop = m_stop,
1030 .show = show_vfsmnt 970 .show = m_show,
1031};
1032
1033static int show_mountinfo(struct seq_file *m, void *v)
1034{
1035 struct proc_mounts *p = m->private;
1036 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
1037 struct super_block *sb = mnt->mnt_sb;
1038 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
1039 struct path root = p->root;
1040 int err = 0;
1041
1042 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
1043 MAJOR(sb->s_dev), MINOR(sb->s_dev));
1044 if (sb->s_op->show_path)
1045 err = sb->s_op->show_path(m, mnt);
1046 else
1047 seq_dentry(m, mnt->mnt_root, " \t\n\\");
1048 if (err)
1049 goto out;
1050 seq_putc(m, ' ');
1051
1052 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
1053 err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
1054 if (err)
1055 goto out;
1056
1057 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
1058 show_mnt_opts(m, mnt);
1059
1060 /* Tagged fields ("foo:X" or "bar") */
1061 if (IS_MNT_SHARED(mnt))
1062 seq_printf(m, " shared:%i", mnt->mnt_group_id);
1063 if (IS_MNT_SLAVE(mnt)) {
1064 int master = mnt->mnt_master->mnt_group_id;
1065 int dom = get_dominating_id(mnt, &p->root);
1066 seq_printf(m, " master:%i", master);
1067 if (dom && dom != master)
1068 seq_printf(m, " propagate_from:%i", dom);
1069 }
1070 if (IS_MNT_UNBINDABLE(mnt))
1071 seq_puts(m, " unbindable");
1072
1073 /* Filesystem specific data */
1074 seq_puts(m, " - ");
1075 show_type(m, sb);
1076 seq_putc(m, ' ');
1077 if (sb->s_op->show_devname)
1078 err = sb->s_op->show_devname(m, mnt);
1079 else
1080 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
1081 if (err)
1082 goto out;
1083 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
1084 err = show_sb_opts(m, sb);
1085 if (err)
1086 goto out;
1087 if (sb->s_op->show_options)
1088 err = sb->s_op->show_options(m, mnt);
1089 seq_putc(m, '\n');
1090out:
1091 return err;
1092}
1093
1094const struct seq_operations mountinfo_op = {
1095 .start = m_start,
1096 .next = m_next,
1097 .stop = m_stop,
1098 .show = show_mountinfo,
1099};
1100
1101static int show_vfsstat(struct seq_file *m, void *v)
1102{
1103 struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
1104 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
1105 int err = 0;
1106
1107 /* device */
1108 if (mnt->mnt_sb->s_op->show_devname) {
1109 seq_puts(m, "device ");
1110 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
1111 } else {
1112 if (mnt->mnt_devname) {
1113 seq_puts(m, "device ");
1114 mangle(m, mnt->mnt_devname);
1115 } else
1116 seq_puts(m, "no device");
1117 }
1118
1119 /* mount point */
1120 seq_puts(m, " mounted on ");
1121 seq_path(m, &mnt_path, " \t\n\\");
1122 seq_putc(m, ' ');
1123
1124 /* file system type */
1125 seq_puts(m, "with fstype ");
1126 show_type(m, mnt->mnt_sb);
1127
1128 /* optional statistics */
1129 if (mnt->mnt_sb->s_op->show_stats) {
1130 seq_putc(m, ' ');
1131 if (!err)
1132 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
1133 }
1134
1135 seq_putc(m, '\n');
1136 return err;
1137}
1138
1139const struct seq_operations mountstats_op = {
1140 .start = m_start,
1141 .next = m_next,
1142 .stop = m_stop,
1143 .show = show_vfsstat,
1144}; 971};
1145#endif /* CONFIG_PROC_FS */ 972#endif /* CONFIG_PROC_FS */
1146 973
@@ -1152,11 +979,13 @@ const struct seq_operations mountstats_op = {
1152 * open files, pwds, chroots or sub mounts that are 979 * open files, pwds, chroots or sub mounts that are
1153 * busy. 980 * busy.
1154 */ 981 */
1155int may_umount_tree(struct vfsmount *mnt) 982int may_umount_tree(struct vfsmount *m)
1156{ 983{
984 struct mount *mnt = real_mount(m);
1157 int actual_refs = 0; 985 int actual_refs = 0;
1158 int minimum_refs = 0; 986 int minimum_refs = 0;
1159 struct vfsmount *p; 987 struct mount *p;
988 BUG_ON(!m);
1160 989
1161 /* write lock needed for mnt_get_count */ 990 /* write lock needed for mnt_get_count */
1162 br_write_lock(vfsmount_lock); 991 br_write_lock(vfsmount_lock);
@@ -1192,7 +1021,7 @@ int may_umount(struct vfsmount *mnt)
1192 int ret = 1; 1021 int ret = 1;
1193 down_read(&namespace_sem); 1022 down_read(&namespace_sem);
1194 br_write_lock(vfsmount_lock); 1023 br_write_lock(vfsmount_lock);
1195 if (propagate_mount_busy(mnt, 2)) 1024 if (propagate_mount_busy(real_mount(mnt), 2))
1196 ret = 0; 1025 ret = 0;
1197 br_write_unlock(vfsmount_lock); 1026 br_write_unlock(vfsmount_lock);
1198 up_read(&namespace_sem); 1027 up_read(&namespace_sem);
@@ -1203,25 +1032,25 @@ EXPORT_SYMBOL(may_umount);
1203 1032
1204void release_mounts(struct list_head *head) 1033void release_mounts(struct list_head *head)
1205{ 1034{
1206 struct vfsmount *mnt; 1035 struct mount *mnt;
1207 while (!list_empty(head)) { 1036 while (!list_empty(head)) {
1208 mnt = list_first_entry(head, struct vfsmount, mnt_hash); 1037 mnt = list_first_entry(head, struct mount, mnt_hash);
1209 list_del_init(&mnt->mnt_hash); 1038 list_del_init(&mnt->mnt_hash);
1210 if (mnt->mnt_parent != mnt) { 1039 if (mnt_has_parent(mnt)) {
1211 struct dentry *dentry; 1040 struct dentry *dentry;
1212 struct vfsmount *m; 1041 struct mount *m;
1213 1042
1214 br_write_lock(vfsmount_lock); 1043 br_write_lock(vfsmount_lock);
1215 dentry = mnt->mnt_mountpoint; 1044 dentry = mnt->mnt_mountpoint;
1216 m = mnt->mnt_parent; 1045 m = mnt->mnt_parent;
1217 mnt->mnt_mountpoint = mnt->mnt_root; 1046 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
1218 mnt->mnt_parent = mnt; 1047 mnt->mnt_parent = mnt;
1219 m->mnt_ghosts--; 1048 m->mnt_ghosts--;
1220 br_write_unlock(vfsmount_lock); 1049 br_write_unlock(vfsmount_lock);
1221 dput(dentry); 1050 dput(dentry);
1222 mntput(m); 1051 mntput(&m->mnt);
1223 } 1052 }
1224 mntput(mnt); 1053 mntput(&mnt->mnt);
1225 } 1054 }
1226} 1055}
1227 1056
@@ -1229,10 +1058,10 @@ void release_mounts(struct list_head *head)
1229 * vfsmount lock must be held for write 1058 * vfsmount lock must be held for write
1230 * namespace_sem must be held for write 1059 * namespace_sem must be held for write
1231 */ 1060 */
1232void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) 1061void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
1233{ 1062{
1234 LIST_HEAD(tmp_list); 1063 LIST_HEAD(tmp_list);
1235 struct vfsmount *p; 1064 struct mount *p;
1236 1065
1237 for (p = mnt; p; p = next_mnt(p, mnt)) 1066 for (p = mnt; p; p = next_mnt(p, mnt))
1238 list_move(&p->mnt_hash, &tmp_list); 1067 list_move(&p->mnt_hash, &tmp_list);
@@ -1247,24 +1076,24 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
1247 p->mnt_ns = NULL; 1076 p->mnt_ns = NULL;
1248 __mnt_make_shortterm(p); 1077 __mnt_make_shortterm(p);
1249 list_del_init(&p->mnt_child); 1078 list_del_init(&p->mnt_child);
1250 if (p->mnt_parent != p) { 1079 if (mnt_has_parent(p)) {
1251 p->mnt_parent->mnt_ghosts++; 1080 p->mnt_parent->mnt_ghosts++;
1252 dentry_reset_mounted(p->mnt_parent, p->mnt_mountpoint); 1081 dentry_reset_mounted(p->mnt_mountpoint);
1253 } 1082 }
1254 change_mnt_propagation(p, MS_PRIVATE); 1083 change_mnt_propagation(p, MS_PRIVATE);
1255 } 1084 }
1256 list_splice(&tmp_list, kill); 1085 list_splice(&tmp_list, kill);
1257} 1086}
1258 1087
1259static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts); 1088static void shrink_submounts(struct mount *mnt, struct list_head *umounts);
1260 1089
1261static int do_umount(struct vfsmount *mnt, int flags) 1090static int do_umount(struct mount *mnt, int flags)
1262{ 1091{
1263 struct super_block *sb = mnt->mnt_sb; 1092 struct super_block *sb = mnt->mnt.mnt_sb;
1264 int retval; 1093 int retval;
1265 LIST_HEAD(umount_list); 1094 LIST_HEAD(umount_list);
1266 1095
1267 retval = security_sb_umount(mnt, flags); 1096 retval = security_sb_umount(&mnt->mnt, flags);
1268 if (retval) 1097 if (retval)
1269 return retval; 1098 return retval;
1270 1099
@@ -1275,7 +1104,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1275 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] 1104 * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
1276 */ 1105 */
1277 if (flags & MNT_EXPIRE) { 1106 if (flags & MNT_EXPIRE) {
1278 if (mnt == current->fs->root.mnt || 1107 if (&mnt->mnt == current->fs->root.mnt ||
1279 flags & (MNT_FORCE | MNT_DETACH)) 1108 flags & (MNT_FORCE | MNT_DETACH))
1280 return -EINVAL; 1109 return -EINVAL;
1281 1110
@@ -1317,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1317 * /reboot - static binary that would close all descriptors and 1146 * /reboot - static binary that would close all descriptors and
1318 * call reboot(9). Then init(8) could umount root and exec /reboot. 1147 * call reboot(9). Then init(8) could umount root and exec /reboot.
1319 */ 1148 */
1320 if (mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) { 1149 if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
1321 /* 1150 /*
1322 * Special case for "unmounting" root ... 1151 * Special case for "unmounting" root ...
1323 * we just try to remount it readonly. 1152 * we just try to remount it readonly.
@@ -1359,6 +1188,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
1359SYSCALL_DEFINE2(umount, char __user *, name, int, flags) 1188SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1360{ 1189{
1361 struct path path; 1190 struct path path;
1191 struct mount *mnt;
1362 int retval; 1192 int retval;
1363 int lookup_flags = 0; 1193 int lookup_flags = 0;
1364 1194
@@ -1371,21 +1201,22 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
1371 retval = user_path_at(AT_FDCWD, name, lookup_flags, &path); 1201 retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
1372 if (retval) 1202 if (retval)
1373 goto out; 1203 goto out;
1204 mnt = real_mount(path.mnt);
1374 retval = -EINVAL; 1205 retval = -EINVAL;
1375 if (path.dentry != path.mnt->mnt_root) 1206 if (path.dentry != path.mnt->mnt_root)
1376 goto dput_and_out; 1207 goto dput_and_out;
1377 if (!check_mnt(path.mnt)) 1208 if (!check_mnt(mnt))
1378 goto dput_and_out; 1209 goto dput_and_out;
1379 1210
1380 retval = -EPERM; 1211 retval = -EPERM;
1381 if (!capable(CAP_SYS_ADMIN)) 1212 if (!capable(CAP_SYS_ADMIN))
1382 goto dput_and_out; 1213 goto dput_and_out;
1383 1214
1384 retval = do_umount(path.mnt, flags); 1215 retval = do_umount(mnt, flags);
1385dput_and_out: 1216dput_and_out:
1386 /* we mustn't call path_put() as that would clear mnt_expiry_mark */ 1217 /* we mustn't call path_put() as that would clear mnt_expiry_mark */
1387 dput(path.dentry); 1218 dput(path.dentry);
1388 mntput_no_expire(path.mnt); 1219 mntput_no_expire(mnt);
1389out: 1220out:
1390 return retval; 1221 return retval;
1391} 1222}
@@ -1420,10 +1251,10 @@ static int mount_is_safe(struct path *path)
1420#endif 1251#endif
1421} 1252}
1422 1253
1423struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, 1254struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1424 int flag) 1255 int flag)
1425{ 1256{
1426 struct vfsmount *res, *p, *q, *r, *s; 1257 struct mount *res, *p, *q, *r;
1427 struct path path; 1258 struct path path;
1428 1259
1429 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1260 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
@@ -1436,6 +1267,7 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1436 1267
1437 p = mnt; 1268 p = mnt;
1438 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 1269 list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
1270 struct mount *s;
1439 if (!is_subdir(r->mnt_mountpoint, dentry)) 1271 if (!is_subdir(r->mnt_mountpoint, dentry))
1440 continue; 1272 continue;
1441 1273
@@ -1449,9 +1281,9 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
1449 q = q->mnt_parent; 1281 q = q->mnt_parent;
1450 } 1282 }
1451 p = s; 1283 p = s;
1452 path.mnt = q; 1284 path.mnt = &q->mnt;
1453 path.dentry = p->mnt_mountpoint; 1285 path.dentry = p->mnt_mountpoint;
1454 q = clone_mnt(p, p->mnt_root, flag); 1286 q = clone_mnt(p, p->mnt.mnt_root, flag);
1455 if (!q) 1287 if (!q)
1456 goto Enomem; 1288 goto Enomem;
1457 br_write_lock(vfsmount_lock); 1289 br_write_lock(vfsmount_lock);
@@ -1474,11 +1306,12 @@ Enomem:
1474 1306
1475struct vfsmount *collect_mounts(struct path *path) 1307struct vfsmount *collect_mounts(struct path *path)
1476{ 1308{
1477 struct vfsmount *tree; 1309 struct mount *tree;
1478 down_write(&namespace_sem); 1310 down_write(&namespace_sem);
1479 tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE); 1311 tree = copy_tree(real_mount(path->mnt), path->dentry,
1312 CL_COPY_ALL | CL_PRIVATE);
1480 up_write(&namespace_sem); 1313 up_write(&namespace_sem);
1481 return tree; 1314 return tree ? &tree->mnt : NULL;
1482} 1315}
1483 1316
1484void drop_collected_mounts(struct vfsmount *mnt) 1317void drop_collected_mounts(struct vfsmount *mnt)
@@ -1486,7 +1319,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
1486 LIST_HEAD(umount_list); 1319 LIST_HEAD(umount_list);
1487 down_write(&namespace_sem); 1320 down_write(&namespace_sem);
1488 br_write_lock(vfsmount_lock); 1321 br_write_lock(vfsmount_lock);
1489 umount_tree(mnt, 0, &umount_list); 1322 umount_tree(real_mount(mnt), 0, &umount_list);
1490 br_write_unlock(vfsmount_lock); 1323 br_write_unlock(vfsmount_lock);
1491 up_write(&namespace_sem); 1324 up_write(&namespace_sem);
1492 release_mounts(&umount_list); 1325 release_mounts(&umount_list);
@@ -1495,21 +1328,21 @@ void drop_collected_mounts(struct vfsmount *mnt)
1495int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, 1328int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
1496 struct vfsmount *root) 1329 struct vfsmount *root)
1497{ 1330{
1498 struct vfsmount *mnt; 1331 struct mount *mnt;
1499 int res = f(root, arg); 1332 int res = f(root, arg);
1500 if (res) 1333 if (res)
1501 return res; 1334 return res;
1502 list_for_each_entry(mnt, &root->mnt_list, mnt_list) { 1335 list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
1503 res = f(mnt, arg); 1336 res = f(&mnt->mnt, arg);
1504 if (res) 1337 if (res)
1505 return res; 1338 return res;
1506 } 1339 }
1507 return 0; 1340 return 0;
1508} 1341}
1509 1342
1510static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end) 1343static void cleanup_group_ids(struct mount *mnt, struct mount *end)
1511{ 1344{
1512 struct vfsmount *p; 1345 struct mount *p;
1513 1346
1514 for (p = mnt; p != end; p = next_mnt(p, mnt)) { 1347 for (p = mnt; p != end; p = next_mnt(p, mnt)) {
1515 if (p->mnt_group_id && !IS_MNT_SHARED(p)) 1348 if (p->mnt_group_id && !IS_MNT_SHARED(p))
@@ -1517,9 +1350,9 @@ static void cleanup_group_ids(struct vfsmount *mnt, struct vfsmount *end)
1517 } 1350 }
1518} 1351}
1519 1352
1520static int invent_group_ids(struct vfsmount *mnt, bool recurse) 1353static int invent_group_ids(struct mount *mnt, bool recurse)
1521{ 1354{
1522 struct vfsmount *p; 1355 struct mount *p;
1523 1356
1524 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) { 1357 for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
1525 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) { 1358 if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
@@ -1597,13 +1430,13 @@ static int invent_group_ids(struct vfsmount *mnt, bool recurse)
1597 * Must be called without spinlocks held, since this function can sleep 1430 * Must be called without spinlocks held, since this function can sleep
1598 * in allocations. 1431 * in allocations.
1599 */ 1432 */
1600static int attach_recursive_mnt(struct vfsmount *source_mnt, 1433static int attach_recursive_mnt(struct mount *source_mnt,
1601 struct path *path, struct path *parent_path) 1434 struct path *path, struct path *parent_path)
1602{ 1435{
1603 LIST_HEAD(tree_list); 1436 LIST_HEAD(tree_list);
1604 struct vfsmount *dest_mnt = path->mnt; 1437 struct mount *dest_mnt = real_mount(path->mnt);
1605 struct dentry *dest_dentry = path->dentry; 1438 struct dentry *dest_dentry = path->dentry;
1606 struct vfsmount *child, *p; 1439 struct mount *child, *p;
1607 int err; 1440 int err;
1608 1441
1609 if (IS_MNT_SHARED(dest_mnt)) { 1442 if (IS_MNT_SHARED(dest_mnt)) {
@@ -1624,7 +1457,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1624 if (parent_path) { 1457 if (parent_path) {
1625 detach_mnt(source_mnt, parent_path); 1458 detach_mnt(source_mnt, parent_path);
1626 attach_mnt(source_mnt, path); 1459 attach_mnt(source_mnt, path);
1627 touch_mnt_namespace(parent_path->mnt->mnt_ns); 1460 touch_mnt_namespace(source_mnt->mnt_ns);
1628 } else { 1461 } else {
1629 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 1462 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
1630 commit_tree(source_mnt); 1463 commit_tree(source_mnt);
@@ -1672,13 +1505,13 @@ static void unlock_mount(struct path *path)
1672 mutex_unlock(&path->dentry->d_inode->i_mutex); 1505 mutex_unlock(&path->dentry->d_inode->i_mutex);
1673} 1506}
1674 1507
1675static int graft_tree(struct vfsmount *mnt, struct path *path) 1508static int graft_tree(struct mount *mnt, struct path *path)
1676{ 1509{
1677 if (mnt->mnt_sb->s_flags & MS_NOUSER) 1510 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1678 return -EINVAL; 1511 return -EINVAL;
1679 1512
1680 if (S_ISDIR(path->dentry->d_inode->i_mode) != 1513 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1681 S_ISDIR(mnt->mnt_root->d_inode->i_mode)) 1514 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1682 return -ENOTDIR; 1515 return -ENOTDIR;
1683 1516
1684 if (d_unlinked(path->dentry)) 1517 if (d_unlinked(path->dentry))
@@ -1709,7 +1542,8 @@ static int flags_to_propagation_type(int flags)
1709 */ 1542 */
1710static int do_change_type(struct path *path, int flag) 1543static int do_change_type(struct path *path, int flag)
1711{ 1544{
1712 struct vfsmount *m, *mnt = path->mnt; 1545 struct mount *m;
1546 struct mount *mnt = real_mount(path->mnt);
1713 int recurse = flag & MS_REC; 1547 int recurse = flag & MS_REC;
1714 int type; 1548 int type;
1715 int err = 0; 1549 int err = 0;
@@ -1749,7 +1583,7 @@ static int do_loopback(struct path *path, char *old_name,
1749{ 1583{
1750 LIST_HEAD(umount_list); 1584 LIST_HEAD(umount_list);
1751 struct path old_path; 1585 struct path old_path;
1752 struct vfsmount *mnt = NULL; 1586 struct mount *mnt = NULL, *old;
1753 int err = mount_is_safe(path); 1587 int err = mount_is_safe(path);
1754 if (err) 1588 if (err)
1755 return err; 1589 return err;
@@ -1763,18 +1597,20 @@ static int do_loopback(struct path *path, char *old_name,
1763 if (err) 1597 if (err)
1764 goto out; 1598 goto out;
1765 1599
1600 old = real_mount(old_path.mnt);
1601
1766 err = -EINVAL; 1602 err = -EINVAL;
1767 if (IS_MNT_UNBINDABLE(old_path.mnt)) 1603 if (IS_MNT_UNBINDABLE(old))
1768 goto out2; 1604 goto out2;
1769 1605
1770 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1606 if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
1771 goto out2; 1607 goto out2;
1772 1608
1773 err = -ENOMEM; 1609 err = -ENOMEM;
1774 if (recurse) 1610 if (recurse)
1775 mnt = copy_tree(old_path.mnt, old_path.dentry, 0); 1611 mnt = copy_tree(old, old_path.dentry, 0);
1776 else 1612 else
1777 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); 1613 mnt = clone_mnt(old, old_path.dentry, 0);
1778 1614
1779 if (!mnt) 1615 if (!mnt)
1780 goto out2; 1616 goto out2;
@@ -1804,9 +1640,9 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1804 return 0; 1640 return 0;
1805 1641
1806 if (readonly_request) 1642 if (readonly_request)
1807 error = mnt_make_readonly(mnt); 1643 error = mnt_make_readonly(real_mount(mnt));
1808 else 1644 else
1809 __mnt_unmake_readonly(mnt); 1645 __mnt_unmake_readonly(real_mount(mnt));
1810 return error; 1646 return error;
1811} 1647}
1812 1648
@@ -1820,11 +1656,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1820{ 1656{
1821 int err; 1657 int err;
1822 struct super_block *sb = path->mnt->mnt_sb; 1658 struct super_block *sb = path->mnt->mnt_sb;
1659 struct mount *mnt = real_mount(path->mnt);
1823 1660
1824 if (!capable(CAP_SYS_ADMIN)) 1661 if (!capable(CAP_SYS_ADMIN))
1825 return -EPERM; 1662 return -EPERM;
1826 1663
1827 if (!check_mnt(path->mnt)) 1664 if (!check_mnt(mnt))
1828 return -EINVAL; 1665 return -EINVAL;
1829 1666
1830 if (path->dentry != path->mnt->mnt_root) 1667 if (path->dentry != path->mnt->mnt_root)
@@ -1841,22 +1678,22 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1841 err = do_remount_sb(sb, flags, data, 0); 1678 err = do_remount_sb(sb, flags, data, 0);
1842 if (!err) { 1679 if (!err) {
1843 br_write_lock(vfsmount_lock); 1680 br_write_lock(vfsmount_lock);
1844 mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; 1681 mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
1845 path->mnt->mnt_flags = mnt_flags; 1682 mnt->mnt.mnt_flags = mnt_flags;
1846 br_write_unlock(vfsmount_lock); 1683 br_write_unlock(vfsmount_lock);
1847 } 1684 }
1848 up_write(&sb->s_umount); 1685 up_write(&sb->s_umount);
1849 if (!err) { 1686 if (!err) {
1850 br_write_lock(vfsmount_lock); 1687 br_write_lock(vfsmount_lock);
1851 touch_mnt_namespace(path->mnt->mnt_ns); 1688 touch_mnt_namespace(mnt->mnt_ns);
1852 br_write_unlock(vfsmount_lock); 1689 br_write_unlock(vfsmount_lock);
1853 } 1690 }
1854 return err; 1691 return err;
1855} 1692}
1856 1693
1857static inline int tree_contains_unbindable(struct vfsmount *mnt) 1694static inline int tree_contains_unbindable(struct mount *mnt)
1858{ 1695{
1859 struct vfsmount *p; 1696 struct mount *p;
1860 for (p = mnt; p; p = next_mnt(p, mnt)) { 1697 for (p = mnt; p; p = next_mnt(p, mnt)) {
1861 if (IS_MNT_UNBINDABLE(p)) 1698 if (IS_MNT_UNBINDABLE(p))
1862 return 1; 1699 return 1;
@@ -1867,7 +1704,8 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt)
1867static int do_move_mount(struct path *path, char *old_name) 1704static int do_move_mount(struct path *path, char *old_name)
1868{ 1705{
1869 struct path old_path, parent_path; 1706 struct path old_path, parent_path;
1870 struct vfsmount *p; 1707 struct mount *p;
1708 struct mount *old;
1871 int err = 0; 1709 int err = 0;
1872 if (!capable(CAP_SYS_ADMIN)) 1710 if (!capable(CAP_SYS_ADMIN))
1873 return -EPERM; 1711 return -EPERM;
@@ -1881,8 +1719,11 @@ static int do_move_mount(struct path *path, char *old_name)
1881 if (err < 0) 1719 if (err < 0)
1882 goto out; 1720 goto out;
1883 1721
1722 old = real_mount(old_path.mnt);
1723 p = real_mount(path->mnt);
1724
1884 err = -EINVAL; 1725 err = -EINVAL;
1885 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1726 if (!check_mnt(p) || !check_mnt(old))
1886 goto out1; 1727 goto out1;
1887 1728
1888 if (d_unlinked(path->dentry)) 1729 if (d_unlinked(path->dentry))
@@ -1892,7 +1733,7 @@ static int do_move_mount(struct path *path, char *old_name)
1892 if (old_path.dentry != old_path.mnt->mnt_root) 1733 if (old_path.dentry != old_path.mnt->mnt_root)
1893 goto out1; 1734 goto out1;
1894 1735
1895 if (old_path.mnt == old_path.mnt->mnt_parent) 1736 if (!mnt_has_parent(old))
1896 goto out1; 1737 goto out1;
1897 1738
1898 if (S_ISDIR(path->dentry->d_inode->i_mode) != 1739 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
@@ -1901,28 +1742,26 @@ static int do_move_mount(struct path *path, char *old_name)
1901 /* 1742 /*
1902 * Don't move a mount residing in a shared parent. 1743 * Don't move a mount residing in a shared parent.
1903 */ 1744 */
1904 if (old_path.mnt->mnt_parent && 1745 if (IS_MNT_SHARED(old->mnt_parent))
1905 IS_MNT_SHARED(old_path.mnt->mnt_parent))
1906 goto out1; 1746 goto out1;
1907 /* 1747 /*
1908 * Don't move a mount tree containing unbindable mounts to a destination 1748 * Don't move a mount tree containing unbindable mounts to a destination
1909 * mount which is shared. 1749 * mount which is shared.
1910 */ 1750 */
1911 if (IS_MNT_SHARED(path->mnt) && 1751 if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
1912 tree_contains_unbindable(old_path.mnt))
1913 goto out1; 1752 goto out1;
1914 err = -ELOOP; 1753 err = -ELOOP;
1915 for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent) 1754 for (; mnt_has_parent(p); p = p->mnt_parent)
1916 if (p == old_path.mnt) 1755 if (p == old)
1917 goto out1; 1756 goto out1;
1918 1757
1919 err = attach_recursive_mnt(old_path.mnt, path, &parent_path); 1758 err = attach_recursive_mnt(old, path, &parent_path);
1920 if (err) 1759 if (err)
1921 goto out1; 1760 goto out1;
1922 1761
1923 /* if the mount is moved, it should no longer be expire 1762 /* if the mount is moved, it should no longer be expire
1924 * automatically */ 1763 * automatically */
1925 list_del_init(&old_path.mnt->mnt_expire); 1764 list_del_init(&old->mnt_expire);
1926out1: 1765out1:
1927 unlock_mount(path); 1766 unlock_mount(path);
1928out: 1767out:
@@ -1955,7 +1794,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1955 return ERR_PTR(err); 1794 return ERR_PTR(err);
1956} 1795}
1957 1796
1958struct vfsmount * 1797static struct vfsmount *
1959do_kern_mount(const char *fstype, int flags, const char *name, void *data) 1798do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1960{ 1799{
1961 struct file_system_type *type = get_fs_type(fstype); 1800 struct file_system_type *type = get_fs_type(fstype);
@@ -1969,12 +1808,11 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1969 put_filesystem(type); 1808 put_filesystem(type);
1970 return mnt; 1809 return mnt;
1971} 1810}
1972EXPORT_SYMBOL_GPL(do_kern_mount);
1973 1811
1974/* 1812/*
1975 * add a mount into a namespace's mount tree 1813 * add a mount into a namespace's mount tree
1976 */ 1814 */
1977static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) 1815static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1978{ 1816{
1979 int err; 1817 int err;
1980 1818
@@ -1985,20 +1823,20 @@ static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flag
1985 return err; 1823 return err;
1986 1824
1987 err = -EINVAL; 1825 err = -EINVAL;
1988 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt)) 1826 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(real_mount(path->mnt)))
1989 goto unlock; 1827 goto unlock;
1990 1828
1991 /* Refuse the same filesystem on the same mount point */ 1829 /* Refuse the same filesystem on the same mount point */
1992 err = -EBUSY; 1830 err = -EBUSY;
1993 if (path->mnt->mnt_sb == newmnt->mnt_sb && 1831 if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
1994 path->mnt->mnt_root == path->dentry) 1832 path->mnt->mnt_root == path->dentry)
1995 goto unlock; 1833 goto unlock;
1996 1834
1997 err = -EINVAL; 1835 err = -EINVAL;
1998 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode)) 1836 if (S_ISLNK(newmnt->mnt.mnt_root->d_inode->i_mode))
1999 goto unlock; 1837 goto unlock;
2000 1838
2001 newmnt->mnt_flags = mnt_flags; 1839 newmnt->mnt.mnt_flags = mnt_flags;
2002 err = graft_tree(newmnt, path); 1840 err = graft_tree(newmnt, path);
2003 1841
2004unlock: 1842unlock:
@@ -2027,7 +1865,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
2027 if (IS_ERR(mnt)) 1865 if (IS_ERR(mnt))
2028 return PTR_ERR(mnt); 1866 return PTR_ERR(mnt);
2029 1867
2030 err = do_add_mount(mnt, path, mnt_flags); 1868 err = do_add_mount(real_mount(mnt), path, mnt_flags);
2031 if (err) 1869 if (err)
2032 mntput(mnt); 1870 mntput(mnt);
2033 return err; 1871 return err;
@@ -2035,11 +1873,12 @@ static int do_new_mount(struct path *path, char *type, int flags,
2035 1873
2036int finish_automount(struct vfsmount *m, struct path *path) 1874int finish_automount(struct vfsmount *m, struct path *path)
2037{ 1875{
1876 struct mount *mnt = real_mount(m);
2038 int err; 1877 int err;
2039 /* The new mount record should have at least 2 refs to prevent it being 1878 /* The new mount record should have at least 2 refs to prevent it being
2040 * expired before we get a chance to add it 1879 * expired before we get a chance to add it
2041 */ 1880 */
2042 BUG_ON(mnt_get_count(m) < 2); 1881 BUG_ON(mnt_get_count(mnt) < 2);
2043 1882
2044 if (m->mnt_sb == path->mnt->mnt_sb && 1883 if (m->mnt_sb == path->mnt->mnt_sb &&
2045 m->mnt_root == path->dentry) { 1884 m->mnt_root == path->dentry) {
@@ -2047,15 +1886,15 @@ int finish_automount(struct vfsmount *m, struct path *path)
2047 goto fail; 1886 goto fail;
2048 } 1887 }
2049 1888
2050 err = do_add_mount(m, path, path->mnt->mnt_flags | MNT_SHRINKABLE); 1889 err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
2051 if (!err) 1890 if (!err)
2052 return 0; 1891 return 0;
2053fail: 1892fail:
2054 /* remove m from any expiration list it may be on */ 1893 /* remove m from any expiration list it may be on */
2055 if (!list_empty(&m->mnt_expire)) { 1894 if (!list_empty(&mnt->mnt_expire)) {
2056 down_write(&namespace_sem); 1895 down_write(&namespace_sem);
2057 br_write_lock(vfsmount_lock); 1896 br_write_lock(vfsmount_lock);
2058 list_del_init(&m->mnt_expire); 1897 list_del_init(&mnt->mnt_expire);
2059 br_write_unlock(vfsmount_lock); 1898 br_write_unlock(vfsmount_lock);
2060 up_write(&namespace_sem); 1899 up_write(&namespace_sem);
2061 } 1900 }
@@ -2074,7 +1913,7 @@ void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2074 down_write(&namespace_sem); 1913 down_write(&namespace_sem);
2075 br_write_lock(vfsmount_lock); 1914 br_write_lock(vfsmount_lock);
2076 1915
2077 list_add_tail(&mnt->mnt_expire, expiry_list); 1916 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2078 1917
2079 br_write_unlock(vfsmount_lock); 1918 br_write_unlock(vfsmount_lock);
2080 up_write(&namespace_sem); 1919 up_write(&namespace_sem);
@@ -2088,7 +1927,7 @@ EXPORT_SYMBOL(mnt_set_expiry);
2088 */ 1927 */
2089void mark_mounts_for_expiry(struct list_head *mounts) 1928void mark_mounts_for_expiry(struct list_head *mounts)
2090{ 1929{
2091 struct vfsmount *mnt, *next; 1930 struct mount *mnt, *next;
2092 LIST_HEAD(graveyard); 1931 LIST_HEAD(graveyard);
2093 LIST_HEAD(umounts); 1932 LIST_HEAD(umounts);
2094 1933
@@ -2111,7 +1950,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2111 list_move(&mnt->mnt_expire, &graveyard); 1950 list_move(&mnt->mnt_expire, &graveyard);
2112 } 1951 }
2113 while (!list_empty(&graveyard)) { 1952 while (!list_empty(&graveyard)) {
2114 mnt = list_first_entry(&graveyard, struct vfsmount, mnt_expire); 1953 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2115 touch_mnt_namespace(mnt->mnt_ns); 1954 touch_mnt_namespace(mnt->mnt_ns);
2116 umount_tree(mnt, 1, &umounts); 1955 umount_tree(mnt, 1, &umounts);
2117 } 1956 }
@@ -2129,9 +1968,9 @@ EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
2129 * search the list of submounts for a given mountpoint, and move any 1968 * search the list of submounts for a given mountpoint, and move any
2130 * shrinkable submounts to the 'graveyard' list. 1969 * shrinkable submounts to the 'graveyard' list.
2131 */ 1970 */
2132static int select_submounts(struct vfsmount *parent, struct list_head *graveyard) 1971static int select_submounts(struct mount *parent, struct list_head *graveyard)
2133{ 1972{
2134 struct vfsmount *this_parent = parent; 1973 struct mount *this_parent = parent;
2135 struct list_head *next; 1974 struct list_head *next;
2136 int found = 0; 1975 int found = 0;
2137 1976
@@ -2140,10 +1979,10 @@ repeat:
2140resume: 1979resume:
2141 while (next != &this_parent->mnt_mounts) { 1980 while (next != &this_parent->mnt_mounts) {
2142 struct list_head *tmp = next; 1981 struct list_head *tmp = next;
2143 struct vfsmount *mnt = list_entry(tmp, struct vfsmount, mnt_child); 1982 struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
2144 1983
2145 next = tmp->next; 1984 next = tmp->next;
2146 if (!(mnt->mnt_flags & MNT_SHRINKABLE)) 1985 if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
2147 continue; 1986 continue;
2148 /* 1987 /*
2149 * Descend a level if the d_mounts list is non-empty. 1988 * Descend a level if the d_mounts list is non-empty.
@@ -2175,15 +2014,15 @@ resume:
2175 * 2014 *
2176 * vfsmount_lock must be held for write 2015 * vfsmount_lock must be held for write
2177 */ 2016 */
2178static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) 2017static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
2179{ 2018{
2180 LIST_HEAD(graveyard); 2019 LIST_HEAD(graveyard);
2181 struct vfsmount *m; 2020 struct mount *m;
2182 2021
2183 /* extract submounts of 'mountpoint' from the expiration list */ 2022 /* extract submounts of 'mountpoint' from the expiration list */
2184 while (select_submounts(mnt, &graveyard)) { 2023 while (select_submounts(mnt, &graveyard)) {
2185 while (!list_empty(&graveyard)) { 2024 while (!list_empty(&graveyard)) {
2186 m = list_first_entry(&graveyard, struct vfsmount, 2025 m = list_first_entry(&graveyard, struct mount,
2187 mnt_expire); 2026 mnt_expire);
2188 touch_mnt_namespace(m->mnt_ns); 2027 touch_mnt_namespace(m->mnt_ns);
2189 umount_tree(m, 1, umounts); 2028 umount_tree(m, 1, umounts);
@@ -2370,12 +2209,13 @@ static struct mnt_namespace *alloc_mnt_ns(void)
2370 2209
2371void mnt_make_longterm(struct vfsmount *mnt) 2210void mnt_make_longterm(struct vfsmount *mnt)
2372{ 2211{
2373 __mnt_make_longterm(mnt); 2212 __mnt_make_longterm(real_mount(mnt));
2374} 2213}
2375 2214
2376void mnt_make_shortterm(struct vfsmount *mnt) 2215void mnt_make_shortterm(struct vfsmount *m)
2377{ 2216{
2378#ifdef CONFIG_SMP 2217#ifdef CONFIG_SMP
2218 struct mount *mnt = real_mount(m);
2379 if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) 2219 if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
2380 return; 2220 return;
2381 br_write_lock(vfsmount_lock); 2221 br_write_lock(vfsmount_lock);
@@ -2393,7 +2233,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2393{ 2233{
2394 struct mnt_namespace *new_ns; 2234 struct mnt_namespace *new_ns;
2395 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 2235 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
2396 struct vfsmount *p, *q; 2236 struct mount *p, *q;
2237 struct mount *old = mnt_ns->root;
2238 struct mount *new;
2397 2239
2398 new_ns = alloc_mnt_ns(); 2240 new_ns = alloc_mnt_ns();
2399 if (IS_ERR(new_ns)) 2241 if (IS_ERR(new_ns))
@@ -2401,15 +2243,15 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2401 2243
2402 down_write(&namespace_sem); 2244 down_write(&namespace_sem);
2403 /* First pass: copy the tree topology */ 2245 /* First pass: copy the tree topology */
2404 new_ns->root = copy_tree(mnt_ns->root, mnt_ns->root->mnt_root, 2246 new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE);
2405 CL_COPY_ALL | CL_EXPIRE); 2247 if (!new) {
2406 if (!new_ns->root) {
2407 up_write(&namespace_sem); 2248 up_write(&namespace_sem);
2408 kfree(new_ns); 2249 kfree(new_ns);
2409 return ERR_PTR(-ENOMEM); 2250 return ERR_PTR(-ENOMEM);
2410 } 2251 }
2252 new_ns->root = new;
2411 br_write_lock(vfsmount_lock); 2253 br_write_lock(vfsmount_lock);
2412 list_add_tail(&new_ns->list, &new_ns->root->mnt_list); 2254 list_add_tail(&new_ns->list, &new->mnt_list);
2413 br_write_unlock(vfsmount_lock); 2255 br_write_unlock(vfsmount_lock);
2414 2256
2415 /* 2257 /*
@@ -2417,27 +2259,27 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2417 * as belonging to new namespace. We have already acquired a private 2259 * as belonging to new namespace. We have already acquired a private
2418 * fs_struct, so tsk->fs->lock is not needed. 2260 * fs_struct, so tsk->fs->lock is not needed.
2419 */ 2261 */
2420 p = mnt_ns->root; 2262 p = old;
2421 q = new_ns->root; 2263 q = new;
2422 while (p) { 2264 while (p) {
2423 q->mnt_ns = new_ns; 2265 q->mnt_ns = new_ns;
2424 __mnt_make_longterm(q); 2266 __mnt_make_longterm(q);
2425 if (fs) { 2267 if (fs) {
2426 if (p == fs->root.mnt) { 2268 if (&p->mnt == fs->root.mnt) {
2427 fs->root.mnt = mntget(q); 2269 fs->root.mnt = mntget(&q->mnt);
2428 __mnt_make_longterm(q); 2270 __mnt_make_longterm(q);
2429 mnt_make_shortterm(p); 2271 mnt_make_shortterm(&p->mnt);
2430 rootmnt = p; 2272 rootmnt = &p->mnt;
2431 } 2273 }
2432 if (p == fs->pwd.mnt) { 2274 if (&p->mnt == fs->pwd.mnt) {
2433 fs->pwd.mnt = mntget(q); 2275 fs->pwd.mnt = mntget(&q->mnt);
2434 __mnt_make_longterm(q); 2276 __mnt_make_longterm(q);
2435 mnt_make_shortterm(p); 2277 mnt_make_shortterm(&p->mnt);
2436 pwdmnt = p; 2278 pwdmnt = &p->mnt;
2437 } 2279 }
2438 } 2280 }
2439 p = next_mnt(p, mnt_ns->root); 2281 p = next_mnt(p, old);
2440 q = next_mnt(q, new_ns->root); 2282 q = next_mnt(q, new);
2441 } 2283 }
2442 up_write(&namespace_sem); 2284 up_write(&namespace_sem);
2443 2285
@@ -2470,22 +2312,20 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
2470 * create_mnt_ns - creates a private namespace and adds a root filesystem 2312 * create_mnt_ns - creates a private namespace and adds a root filesystem
2471 * @mnt: pointer to the new root filesystem mountpoint 2313 * @mnt: pointer to the new root filesystem mountpoint
2472 */ 2314 */
2473struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) 2315static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
2474{ 2316{
2475 struct mnt_namespace *new_ns; 2317 struct mnt_namespace *new_ns = alloc_mnt_ns();
2476
2477 new_ns = alloc_mnt_ns();
2478 if (!IS_ERR(new_ns)) { 2318 if (!IS_ERR(new_ns)) {
2319 struct mount *mnt = real_mount(m);
2479 mnt->mnt_ns = new_ns; 2320 mnt->mnt_ns = new_ns;
2480 __mnt_make_longterm(mnt); 2321 __mnt_make_longterm(mnt);
2481 new_ns->root = mnt; 2322 new_ns->root = mnt;
2482 list_add(&new_ns->list, &new_ns->root->mnt_list); 2323 list_add(&new_ns->list, &mnt->mnt_list);
2483 } else { 2324 } else {
2484 mntput(mnt); 2325 mntput(m);
2485 } 2326 }
2486 return new_ns; 2327 return new_ns;
2487} 2328}
2488EXPORT_SYMBOL(create_mnt_ns);
2489 2329
2490struct dentry *mount_subtree(struct vfsmount *mnt, const char *name) 2330struct dentry *mount_subtree(struct vfsmount *mnt, const char *name)
2491{ 2331{
@@ -2559,6 +2399,31 @@ out_type:
2559} 2399}
2560 2400
2561/* 2401/*
2402 * Return true if path is reachable from root
2403 *
2404 * namespace_sem or vfsmount_lock is held
2405 */
2406bool is_path_reachable(struct mount *mnt, struct dentry *dentry,
2407 const struct path *root)
2408{
2409 while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
2410 dentry = mnt->mnt_mountpoint;
2411 mnt = mnt->mnt_parent;
2412 }
2413 return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
2414}
2415
2416int path_is_under(struct path *path1, struct path *path2)
2417{
2418 int res;
2419 br_read_lock(vfsmount_lock);
2420 res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
2421 br_read_unlock(vfsmount_lock);
2422 return res;
2423}
2424EXPORT_SYMBOL(path_is_under);
2425
2426/*
2562 * pivot_root Semantics: 2427 * pivot_root Semantics:
2563 * Moves the root file system of the current process to the directory put_old, 2428 * Moves the root file system of the current process to the directory put_old,
2564 * makes new_root as the new root file system of the current process, and sets 2429 * makes new_root as the new root file system of the current process, and sets
@@ -2586,8 +2451,8 @@ out_type:
2586SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, 2451SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2587 const char __user *, put_old) 2452 const char __user *, put_old)
2588{ 2453{
2589 struct vfsmount *tmp;
2590 struct path new, old, parent_path, root_parent, root; 2454 struct path new, old, parent_path, root_parent, root;
2455 struct mount *new_mnt, *root_mnt;
2591 int error; 2456 int error;
2592 2457
2593 if (!capable(CAP_SYS_ADMIN)) 2458 if (!capable(CAP_SYS_ADMIN))
@@ -2611,11 +2476,13 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2611 goto out3; 2476 goto out3;
2612 2477
2613 error = -EINVAL; 2478 error = -EINVAL;
2614 if (IS_MNT_SHARED(old.mnt) || 2479 new_mnt = real_mount(new.mnt);
2615 IS_MNT_SHARED(new.mnt->mnt_parent) || 2480 root_mnt = real_mount(root.mnt);
2616 IS_MNT_SHARED(root.mnt->mnt_parent)) 2481 if (IS_MNT_SHARED(real_mount(old.mnt)) ||
2482 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2483 IS_MNT_SHARED(root_mnt->mnt_parent))
2617 goto out4; 2484 goto out4;
2618 if (!check_mnt(root.mnt) || !check_mnt(new.mnt)) 2485 if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
2619 goto out4; 2486 goto out4;
2620 error = -ENOENT; 2487 error = -ENOENT;
2621 if (d_unlinked(new.dentry)) 2488 if (d_unlinked(new.dentry))
@@ -2629,33 +2496,22 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2629 error = -EINVAL; 2496 error = -EINVAL;
2630 if (root.mnt->mnt_root != root.dentry) 2497 if (root.mnt->mnt_root != root.dentry)
2631 goto out4; /* not a mountpoint */ 2498 goto out4; /* not a mountpoint */
2632 if (root.mnt->mnt_parent == root.mnt) 2499 if (!mnt_has_parent(root_mnt))
2633 goto out4; /* not attached */ 2500 goto out4; /* not attached */
2634 if (new.mnt->mnt_root != new.dentry) 2501 if (new.mnt->mnt_root != new.dentry)
2635 goto out4; /* not a mountpoint */ 2502 goto out4; /* not a mountpoint */
2636 if (new.mnt->mnt_parent == new.mnt) 2503 if (!mnt_has_parent(new_mnt))
2637 goto out4; /* not attached */ 2504 goto out4; /* not attached */
2638 /* make sure we can reach put_old from new_root */ 2505 /* make sure we can reach put_old from new_root */
2639 tmp = old.mnt; 2506 if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new))
2640 if (tmp != new.mnt) {
2641 for (;;) {
2642 if (tmp->mnt_parent == tmp)
2643 goto out4; /* already mounted on put_old */
2644 if (tmp->mnt_parent == new.mnt)
2645 break;
2646 tmp = tmp->mnt_parent;
2647 }
2648 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2649 goto out4;
2650 } else if (!is_subdir(old.dentry, new.dentry))
2651 goto out4; 2507 goto out4;
2652 br_write_lock(vfsmount_lock); 2508 br_write_lock(vfsmount_lock);
2653 detach_mnt(new.mnt, &parent_path); 2509 detach_mnt(new_mnt, &parent_path);
2654 detach_mnt(root.mnt, &root_parent); 2510 detach_mnt(root_mnt, &root_parent);
2655 /* mount old root on put_old */ 2511 /* mount old root on put_old */
2656 attach_mnt(root.mnt, &old); 2512 attach_mnt(root_mnt, &old);
2657 /* mount new_root on / */ 2513 /* mount new_root on / */
2658 attach_mnt(new.mnt, &root_parent); 2514 attach_mnt(new_mnt, &root_parent);
2659 touch_mnt_namespace(current->nsproxy->mnt_ns); 2515 touch_mnt_namespace(current->nsproxy->mnt_ns);
2660 br_write_unlock(vfsmount_lock); 2516 br_write_unlock(vfsmount_lock);
2661 chroot_fs_refs(&root, &new); 2517 chroot_fs_refs(&root, &new);
@@ -2693,8 +2549,8 @@ static void __init init_mount_tree(void)
2693 init_task.nsproxy->mnt_ns = ns; 2549 init_task.nsproxy->mnt_ns = ns;
2694 get_mnt_ns(ns); 2550 get_mnt_ns(ns);
2695 2551
2696 root.mnt = ns->root; 2552 root.mnt = mnt;
2697 root.dentry = ns->root->mnt_root; 2553 root.dentry = mnt->mnt_root;
2698 2554
2699 set_fs_pwd(current->fs, &root); 2555 set_fs_pwd(current->fs, &root);
2700 set_fs_root(current->fs, &root); 2556 set_fs_root(current->fs, &root);
@@ -2707,7 +2563,7 @@ void __init mnt_init(void)
2707 2563
2708 init_rwsem(&namespace_sem); 2564 init_rwsem(&namespace_sem);
2709 2565
2710 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount), 2566 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
2711 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2567 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2712 2568
2713 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); 2569 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
@@ -2747,7 +2603,6 @@ void put_mnt_ns(struct mnt_namespace *ns)
2747 release_mounts(&umount_list); 2603 release_mounts(&umount_list);
2748 kfree(ns); 2604 kfree(ns);
2749} 2605}
2750EXPORT_SYMBOL(put_mnt_ns);
2751 2606
2752struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) 2607struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
2753{ 2608{
@@ -2776,5 +2631,5 @@ EXPORT_SYMBOL(kern_unmount);
2776 2631
2777bool our_mnt(struct vfsmount *mnt) 2632bool our_mnt(struct vfsmount *mnt)
2778{ 2633{
2779 return check_mnt(mnt); 2634 return check_mnt(real_mount(mnt));
2780} 2635}
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 9c51f621e901..aeed93a6bde0 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -30,15 +30,15 @@ static void ncp_do_readdir(struct file *, void *, filldir_t,
30 30
31static int ncp_readdir(struct file *, void *, filldir_t); 31static int ncp_readdir(struct file *, void *, filldir_t);
32 32
33static int ncp_create(struct inode *, struct dentry *, int, struct nameidata *); 33static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
34static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *); 34static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *);
35static int ncp_unlink(struct inode *, struct dentry *); 35static int ncp_unlink(struct inode *, struct dentry *);
36static int ncp_mkdir(struct inode *, struct dentry *, int); 36static int ncp_mkdir(struct inode *, struct dentry *, umode_t);
37static int ncp_rmdir(struct inode *, struct dentry *); 37static int ncp_rmdir(struct inode *, struct dentry *);
38static int ncp_rename(struct inode *, struct dentry *, 38static int ncp_rename(struct inode *, struct dentry *,
39 struct inode *, struct dentry *); 39 struct inode *, struct dentry *);
40static int ncp_mknod(struct inode * dir, struct dentry *dentry, 40static int ncp_mknod(struct inode * dir, struct dentry *dentry,
41 int mode, dev_t rdev); 41 umode_t mode, dev_t rdev);
42#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS) 42#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
43extern int ncp_symlink(struct inode *, struct dentry *, const char *); 43extern int ncp_symlink(struct inode *, struct dentry *, const char *);
44#else 44#else
@@ -919,7 +919,7 @@ out_close:
919 goto out; 919 goto out;
920} 920}
921 921
922int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode, 922int ncp_create_new(struct inode *dir, struct dentry *dentry, umode_t mode,
923 dev_t rdev, __le32 attributes) 923 dev_t rdev, __le32 attributes)
924{ 924{
925 struct ncp_server *server = NCP_SERVER(dir); 925 struct ncp_server *server = NCP_SERVER(dir);
@@ -928,7 +928,7 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode,
928 int opmode; 928 int opmode;
929 __u8 __name[NCP_MAXPATHLEN + 1]; 929 __u8 __name[NCP_MAXPATHLEN + 1];
930 930
931 PPRINTK("ncp_create_new: creating %s/%s, mode=%x\n", 931 PPRINTK("ncp_create_new: creating %s/%s, mode=%hx\n",
932 dentry->d_parent->d_name.name, dentry->d_name.name, mode); 932 dentry->d_parent->d_name.name, dentry->d_name.name, mode);
933 933
934 ncp_age_dentry(server, dentry); 934 ncp_age_dentry(server, dentry);
@@ -979,13 +979,13 @@ out:
979 return error; 979 return error;
980} 980}
981 981
982static int ncp_create(struct inode *dir, struct dentry *dentry, int mode, 982static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode,
983 struct nameidata *nd) 983 struct nameidata *nd)
984{ 984{
985 return ncp_create_new(dir, dentry, mode, 0, 0); 985 return ncp_create_new(dir, dentry, mode, 0, 0);
986} 986}
987 987
988static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode) 988static int ncp_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
989{ 989{
990 struct ncp_entry_info finfo; 990 struct ncp_entry_info finfo;
991 struct ncp_server *server = NCP_SERVER(dir); 991 struct ncp_server *server = NCP_SERVER(dir);
@@ -1201,12 +1201,12 @@ out:
1201} 1201}
1202 1202
1203static int ncp_mknod(struct inode * dir, struct dentry *dentry, 1203static int ncp_mknod(struct inode * dir, struct dentry *dentry,
1204 int mode, dev_t rdev) 1204 umode_t mode, dev_t rdev)
1205{ 1205{
1206 if (!new_valid_dev(rdev)) 1206 if (!new_valid_dev(rdev))
1207 return -EINVAL; 1207 return -EINVAL;
1208 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) { 1208 if (ncp_is_nfs_extras(NCP_SERVER(dir), NCP_FINFO(dir)->volNumber)) {
1209 DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%o\n", mode); 1209 DPRINTK(KERN_DEBUG "ncp_mknod: mode = 0%ho\n", mode);
1210 return ncp_create_new(dir, dentry, mode, rdev, 0); 1210 return ncp_create_new(dir, dentry, mode, rdev, 0);
1211 } 1211 }
1212 return -EPERM; /* Strange, but true */ 1212 return -EPERM; /* Strange, but true */
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 5b5fa33b6b9d..3d1e34f8a68e 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -44,7 +44,7 @@
44static void ncp_evict_inode(struct inode *); 44static void ncp_evict_inode(struct inode *);
45static void ncp_put_super(struct super_block *); 45static void ncp_put_super(struct super_block *);
46static int ncp_statfs(struct dentry *, struct kstatfs *); 46static int ncp_statfs(struct dentry *, struct kstatfs *);
47static int ncp_show_options(struct seq_file *, struct vfsmount *); 47static int ncp_show_options(struct seq_file *, struct dentry *);
48 48
49static struct kmem_cache * ncp_inode_cachep; 49static struct kmem_cache * ncp_inode_cachep;
50 50
@@ -60,7 +60,6 @@ static struct inode *ncp_alloc_inode(struct super_block *sb)
60static void ncp_i_callback(struct rcu_head *head) 60static void ncp_i_callback(struct rcu_head *head)
61{ 61{
62 struct inode *inode = container_of(head, struct inode, i_rcu); 62 struct inode *inode = container_of(head, struct inode, i_rcu);
63 INIT_LIST_HEAD(&inode->i_dentry);
64 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); 63 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
65} 64}
66 65
@@ -323,9 +322,9 @@ static void ncp_stop_tasks(struct ncp_server *server) {
323 flush_work_sync(&server->timeout_tq); 322 flush_work_sync(&server->timeout_tq);
324} 323}
325 324
326static int ncp_show_options(struct seq_file *seq, struct vfsmount *mnt) 325static int ncp_show_options(struct seq_file *seq, struct dentry *root)
327{ 326{
328 struct ncp_server *server = NCP_SBP(mnt->mnt_sb); 327 struct ncp_server *server = NCP_SBP(root->d_sb);
329 unsigned int tmp; 328 unsigned int tmp;
330 329
331 if (server->m.uid != 0) 330 if (server->m.uid != 0)
@@ -548,7 +547,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
548 547
549 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY); 548 error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
550 if (error) 549 if (error)
551 goto out_bdi; 550 goto out_fput;
552 551
553 server->ncp_filp = ncp_filp; 552 server->ncp_filp = ncp_filp;
554 server->ncp_sock = sock; 553 server->ncp_sock = sock;
@@ -559,7 +558,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
559 error = -EBADF; 558 error = -EBADF;
560 server->info_filp = fget(data.info_fd); 559 server->info_filp = fget(data.info_fd);
561 if (!server->info_filp) 560 if (!server->info_filp)
562 goto out_fput; 561 goto out_bdi;
563 error = -ENOTSOCK; 562 error = -ENOTSOCK;
564 sock_inode = server->info_filp->f_path.dentry->d_inode; 563 sock_inode = server->info_filp->f_path.dentry->d_inode;
565 if (!S_ISSOCK(sock_inode->i_mode)) 564 if (!S_ISSOCK(sock_inode->i_mode))
@@ -746,9 +745,9 @@ out_nls:
746out_fput2: 745out_fput2:
747 if (server->info_filp) 746 if (server->info_filp)
748 fput(server->info_filp); 747 fput(server->info_filp);
749out_fput:
750 bdi_destroy(&server->bdi);
751out_bdi: 748out_bdi:
749 bdi_destroy(&server->bdi);
750out_fput:
752 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>: 751 /* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
753 * 752 *
754 * The previously used put_filp(ncp_filp); was bogus, since 753 * The previously used put_filp(ncp_filp); was bogus, since
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 790e92a9ec63..6958adfaff08 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -901,7 +901,7 @@ long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
901 ret = __ncp_ioctl(inode, cmd, arg); 901 ret = __ncp_ioctl(inode, cmd, arg);
902outDropWrite: 902outDropWrite:
903 if (need_drop_write) 903 if (need_drop_write)
904 mnt_drop_write(filp->f_path.mnt); 904 mnt_drop_write_file(filp);
905out: 905out:
906 return ret; 906 return ret;
907} 907}
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 09881e6aa5ad..32c06587351a 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -114,7 +114,7 @@ int ncp_dirhandle_alloc(struct ncp_server *, __u8 vol, __le32 dirent, __u8 *dirh
114int ncp_dirhandle_free(struct ncp_server *, __u8 dirhandle); 114int ncp_dirhandle_free(struct ncp_server *, __u8 dirhandle);
115 115
116int ncp_create_new(struct inode *dir, struct dentry *dentry, 116int ncp_create_new(struct inode *dir, struct dentry *dentry,
117 int mode, dev_t rdev, __le32 attributes); 117 umode_t mode, dev_t rdev, __le32 attributes);
118 118
119static inline int ncp_is_nfs_extras(struct ncp_server* server, unsigned int volnum) { 119static inline int ncp_is_nfs_extras(struct ncp_server* server, unsigned int volnum) {
120#ifdef CONFIG_NCPFS_NFS_NS 120#ifdef CONFIG_NCPFS_NFS_NS
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c
index 661f861d80c6..52439ddc8de0 100644
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -108,7 +108,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
108 char *rawlink; 108 char *rawlink;
109 int length, err, i, outlen; 109 int length, err, i, outlen;
110 int kludge; 110 int kludge;
111 int mode; 111 umode_t mode;
112 __le32 attr; 112 __le32 attr;
113 unsigned int hdr; 113 unsigned int hdr;
114 114
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ac2899098147..fd9a872fada0 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -47,13 +47,13 @@ static int nfs_opendir(struct inode *, struct file *);
47static int nfs_closedir(struct inode *, struct file *); 47static int nfs_closedir(struct inode *, struct file *);
48static int nfs_readdir(struct file *, void *, filldir_t); 48static int nfs_readdir(struct file *, void *, filldir_t);
49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); 49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
50static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); 50static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *);
51static int nfs_mkdir(struct inode *, struct dentry *, int); 51static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
52static int nfs_rmdir(struct inode *, struct dentry *); 52static int nfs_rmdir(struct inode *, struct dentry *);
53static int nfs_unlink(struct inode *, struct dentry *); 53static int nfs_unlink(struct inode *, struct dentry *);
54static int nfs_symlink(struct inode *, struct dentry *, const char *); 54static int nfs_symlink(struct inode *, struct dentry *, const char *);
55static int nfs_link(struct dentry *, struct inode *, struct dentry *); 55static int nfs_link(struct dentry *, struct inode *, struct dentry *);
56static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); 56static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
57static int nfs_rename(struct inode *, struct dentry *, 57static int nfs_rename(struct inode *, struct dentry *,
58 struct inode *, struct dentry *); 58 struct inode *, struct dentry *);
59static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); 59static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
@@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = {
112#ifdef CONFIG_NFS_V4 112#ifdef CONFIG_NFS_V4
113 113
114static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); 114static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
115static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd); 115static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd);
116const struct inode_operations nfs4_dir_inode_operations = { 116const struct inode_operations nfs4_dir_inode_operations = {
117 .create = nfs_open_create, 117 .create = nfs_open_create,
118 .lookup = nfs_atomic_lookup, 118 .lookup = nfs_atomic_lookup,
@@ -1368,18 +1368,7 @@ static fmode_t flags_to_mode(int flags)
1368 1368
1369static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags) 1369static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
1370{ 1370{
1371 struct nfs_open_context *ctx; 1371 return alloc_nfs_open_context(dentry, flags_to_mode(open_flags));
1372 struct rpc_cred *cred;
1373 fmode_t fmode = flags_to_mode(open_flags);
1374
1375 cred = rpc_lookup_cred();
1376 if (IS_ERR(cred))
1377 return ERR_CAST(cred);
1378 ctx = alloc_nfs_open_context(dentry, cred, fmode);
1379 put_rpccred(cred);
1380 if (ctx == NULL)
1381 return ERR_PTR(-ENOMEM);
1382 return ctx;
1383} 1372}
1384 1373
1385static int do_open(struct inode *inode, struct file *filp) 1374static int do_open(struct inode *inode, struct file *filp)
@@ -1584,8 +1573,8 @@ no_open:
1584 return nfs_lookup_revalidate(dentry, nd); 1573 return nfs_lookup_revalidate(dentry, nd);
1585} 1574}
1586 1575
1587static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, 1576static int nfs_open_create(struct inode *dir, struct dentry *dentry,
1588 struct nameidata *nd) 1577 umode_t mode, struct nameidata *nd)
1589{ 1578{
1590 struct nfs_open_context *ctx = NULL; 1579 struct nfs_open_context *ctx = NULL;
1591 struct iattr attr; 1580 struct iattr attr;
@@ -1675,8 +1664,8 @@ out_error:
1675 * that the operation succeeded on the server, but an error in the 1664 * that the operation succeeded on the server, but an error in the
1676 * reply path made it appear to have failed. 1665 * reply path made it appear to have failed.
1677 */ 1666 */
1678static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, 1667static int nfs_create(struct inode *dir, struct dentry *dentry,
1679 struct nameidata *nd) 1668 umode_t mode, struct nameidata *nd)
1680{ 1669{
1681 struct iattr attr; 1670 struct iattr attr;
1682 int error; 1671 int error;
@@ -1704,7 +1693,7 @@ out_err:
1704 * See comments for nfs_proc_create regarding failed operations. 1693 * See comments for nfs_proc_create regarding failed operations.
1705 */ 1694 */
1706static int 1695static int
1707nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 1696nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
1708{ 1697{
1709 struct iattr attr; 1698 struct iattr attr;
1710 int status; 1699 int status;
@@ -1730,7 +1719,7 @@ out_err:
1730/* 1719/*
1731 * See comments for nfs_proc_create regarding failed operations. 1720 * See comments for nfs_proc_create regarding failed operations.
1732 */ 1721 */
1733static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1722static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1734{ 1723{
1735 struct iattr attr; 1724 struct iattr attr;
1736 int error; 1725 int error;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index eca56d4b39c0..606ef0f20aed 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -147,7 +147,7 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
147 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate 147 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
148 * the cached file length 148 * the cached file length
149 */ 149 */
150 if (origin != SEEK_SET || origin != SEEK_CUR) { 150 if (origin != SEEK_SET && origin != SEEK_CUR) {
151 struct inode *inode = filp->f_mapping->host; 151 struct inode *inode = filp->f_mapping->host;
152 152
153 int retval = nfs_revalidate_file_size(inode, filp); 153 int retval = nfs_revalidate_file_size(inode, filp);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a15fa8cf98..81db25e92e10 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -38,6 +38,7 @@
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h> 40#include <linux/compat.h>
41#include <linux/freezer.h>
41 42
42#include <asm/system.h> 43#include <asm/system.h>
43#include <asm/uaccess.h> 44#include <asm/uaccess.h>
@@ -77,7 +78,7 @@ int nfs_wait_bit_killable(void *word)
77{ 78{
78 if (fatal_signal_pending(current)) 79 if (fatal_signal_pending(current))
79 return -ERESTARTSYS; 80 return -ERESTARTSYS;
80 schedule(); 81 freezable_schedule();
81 return 0; 82 return 0;
82} 83}
83 84
@@ -629,23 +630,28 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
629 nfs_revalidate_inode(server, inode); 630 nfs_revalidate_inode(server, inode);
630} 631}
631 632
632struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode) 633struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode)
633{ 634{
634 struct nfs_open_context *ctx; 635 struct nfs_open_context *ctx;
636 struct rpc_cred *cred = rpc_lookup_cred();
637 if (IS_ERR(cred))
638 return ERR_CAST(cred);
635 639
636 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 640 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
637 if (ctx != NULL) { 641 if (!ctx) {
638 nfs_sb_active(dentry->d_sb); 642 put_rpccred(cred);
639 ctx->dentry = dget(dentry); 643 return ERR_PTR(-ENOMEM);
640 ctx->cred = get_rpccred(cred);
641 ctx->state = NULL;
642 ctx->mode = f_mode;
643 ctx->flags = 0;
644 ctx->error = 0;
645 nfs_init_lock_context(&ctx->lock_context);
646 ctx->lock_context.open_context = ctx;
647 INIT_LIST_HEAD(&ctx->list);
648 } 644 }
645 nfs_sb_active(dentry->d_sb);
646 ctx->dentry = dget(dentry);
647 ctx->cred = cred;
648 ctx->state = NULL;
649 ctx->mode = f_mode;
650 ctx->flags = 0;
651 ctx->error = 0;
652 nfs_init_lock_context(&ctx->lock_context);
653 ctx->lock_context.open_context = ctx;
654 INIT_LIST_HEAD(&ctx->list);
649 return ctx; 655 return ctx;
650} 656}
651 657
@@ -738,15 +744,10 @@ static void nfs_file_clear_open_context(struct file *filp)
738int nfs_open(struct inode *inode, struct file *filp) 744int nfs_open(struct inode *inode, struct file *filp)
739{ 745{
740 struct nfs_open_context *ctx; 746 struct nfs_open_context *ctx;
741 struct rpc_cred *cred;
742 747
743 cred = rpc_lookup_cred(); 748 ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
744 if (IS_ERR(cred)) 749 if (IS_ERR(ctx))
745 return PTR_ERR(cred); 750 return PTR_ERR(ctx);
746 ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode);
747 put_rpccred(cred);
748 if (ctx == NULL)
749 return -ENOMEM;
750 nfs_file_set_open_context(filp, ctx); 751 nfs_file_set_open_context(filp, ctx);
751 put_nfs_open_context(ctx); 752 put_nfs_open_context(ctx);
752 nfs_fscache_set_inode_cookie(inode, filp); 753 nfs_fscache_set_inode_cookie(inode, filp);
@@ -1464,7 +1465,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
1464static void nfs_i_callback(struct rcu_head *head) 1465static void nfs_i_callback(struct rcu_head *head)
1465{ 1466{
1466 struct inode *inode = container_of(head, struct inode, i_rcu); 1467 struct inode *inode = container_of(head, struct inode, i_rcu);
1467 INIT_LIST_HEAD(&inode->i_dentry);
1468 kmem_cache_free(nfs_inode_cachep, NFS_I(inode)); 1468 kmem_cache_free(nfs_inode_cachep, NFS_I(inode));
1469} 1469}
1470 1470
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d4bc9ed91748..91943953a370 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -17,6 +17,7 @@
17#include <linux/nfs_page.h> 17#include <linux/nfs_page.h>
18#include <linux/lockd/bind.h> 18#include <linux/lockd/bind.h>
19#include <linux/nfs_mount.h> 19#include <linux/nfs_mount.h>
20#include <linux/freezer.h>
20 21
21#include "iostat.h" 22#include "iostat.h"
22#include "internal.h" 23#include "internal.h"
@@ -32,7 +33,7 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
32 res = rpc_call_sync(clnt, msg, flags); 33 res = rpc_call_sync(clnt, msg, flags);
33 if (res != -EJUKEBOX && res != -EKEYEXPIRED) 34 if (res != -EJUKEBOX && res != -EKEYEXPIRED)
34 break; 35 break;
35 schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); 36 freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
36 res = -ERESTARTSYS; 37 res = -ERESTARTSYS;
37 } while (!fatal_signal_pending(current)); 38 } while (!fatal_signal_pending(current));
38 return res; 39 return res;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be2bbac13817..dcda0ba7af60 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -39,6 +39,8 @@
39#include <linux/delay.h> 39#include <linux/delay.h>
40#include <linux/errno.h> 40#include <linux/errno.h>
41#include <linux/string.h> 41#include <linux/string.h>
42#include <linux/ratelimit.h>
43#include <linux/printk.h>
42#include <linux/slab.h> 44#include <linux/slab.h>
43#include <linux/sunrpc/clnt.h> 45#include <linux/sunrpc/clnt.h>
44#include <linux/sunrpc/gss_api.h> 46#include <linux/sunrpc/gss_api.h>
@@ -53,6 +55,7 @@
53#include <linux/sunrpc/bc_xprt.h> 55#include <linux/sunrpc/bc_xprt.h>
54#include <linux/xattr.h> 56#include <linux/xattr.h>
55#include <linux/utsname.h> 57#include <linux/utsname.h>
58#include <linux/freezer.h>
56 59
57#include "nfs4_fs.h" 60#include "nfs4_fs.h"
58#include "delegation.h" 61#include "delegation.h"
@@ -241,7 +244,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
241 *timeout = NFS4_POLL_RETRY_MIN; 244 *timeout = NFS4_POLL_RETRY_MIN;
242 if (*timeout > NFS4_POLL_RETRY_MAX) 245 if (*timeout > NFS4_POLL_RETRY_MAX)
243 *timeout = NFS4_POLL_RETRY_MAX; 246 *timeout = NFS4_POLL_RETRY_MAX;
244 schedule_timeout_killable(*timeout); 247 freezable_schedule_timeout_killable(*timeout);
245 if (fatal_signal_pending(current)) 248 if (fatal_signal_pending(current))
246 res = -ERESTARTSYS; 249 res = -ERESTARTSYS;
247 *timeout <<= 1; 250 *timeout <<= 1;
@@ -894,6 +897,8 @@ out:
894 897
895static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode) 898static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
896{ 899{
900 if (delegation == NULL)
901 return 0;
897 if ((delegation->type & fmode) != fmode) 902 if ((delegation->type & fmode) != fmode)
898 return 0; 903 return 0;
899 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags)) 904 if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
@@ -1036,8 +1041,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
1036 } 1041 }
1037 rcu_read_lock(); 1042 rcu_read_lock();
1038 delegation = rcu_dereference(nfsi->delegation); 1043 delegation = rcu_dereference(nfsi->delegation);
1039 if (delegation == NULL || 1044 if (!can_open_delegated(delegation, fmode)) {
1040 !can_open_delegated(delegation, fmode)) {
1041 rcu_read_unlock(); 1045 rcu_read_unlock();
1042 break; 1046 break;
1043 } 1047 }
@@ -1091,7 +1095,12 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
1091 if (delegation) 1095 if (delegation)
1092 delegation_flags = delegation->flags; 1096 delegation_flags = delegation->flags;
1093 rcu_read_unlock(); 1097 rcu_read_unlock();
1094 if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) 1098 if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) {
1099 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1100 "returning a delegation for "
1101 "OPEN(CLAIM_DELEGATE_CUR)\n",
1102 NFS_CLIENT(inode)->cl_server);
1103 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1095 nfs_inode_set_delegation(state->inode, 1104 nfs_inode_set_delegation(state->inode,
1096 data->owner->so_cred, 1105 data->owner->so_cred,
1097 &data->o_res); 1106 &data->o_res);
@@ -1423,11 +1432,9 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1423 goto out_no_action; 1432 goto out_no_action;
1424 rcu_read_lock(); 1433 rcu_read_lock();
1425 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); 1434 delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
1426 if (delegation != NULL && 1435 if (data->o_arg.claim != NFS4_OPEN_CLAIM_DELEGATE_CUR &&
1427 test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) { 1436 can_open_delegated(delegation, data->o_arg.fmode))
1428 rcu_read_unlock(); 1437 goto unlock_no_action;
1429 goto out_no_action;
1430 }
1431 rcu_read_unlock(); 1438 rcu_read_unlock();
1432 } 1439 }
1433 /* Update sequence id. */ 1440 /* Update sequence id. */
@@ -1444,6 +1451,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1444 return; 1451 return;
1445 rpc_call_start(task); 1452 rpc_call_start(task);
1446 return; 1453 return;
1454unlock_no_action:
1455 rcu_read_unlock();
1447out_no_action: 1456out_no_action:
1448 task->tk_action = NULL; 1457 task->tk_action = NULL;
1449 1458
@@ -3950,7 +3959,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
3950static unsigned long 3959static unsigned long
3951nfs4_set_lock_task_retry(unsigned long timeout) 3960nfs4_set_lock_task_retry(unsigned long timeout)
3952{ 3961{
3953 schedule_timeout_killable(timeout); 3962 freezable_schedule_timeout_killable(timeout);
3954 timeout <<= 1; 3963 timeout <<= 1;
3955 if (timeout > NFS4_LOCK_MAXTIMEOUT) 3964 if (timeout > NFS4_LOCK_MAXTIMEOUT)
3956 return NFS4_LOCK_MAXTIMEOUT; 3965 return NFS4_LOCK_MAXTIMEOUT;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 39914be40b03..6a7107ae6b72 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1156,11 +1156,13 @@ restart:
1156 if (status >= 0) { 1156 if (status >= 0) {
1157 status = nfs4_reclaim_locks(state, ops); 1157 status = nfs4_reclaim_locks(state, ops);
1158 if (status >= 0) { 1158 if (status >= 0) {
1159 spin_lock(&state->state_lock);
1159 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1160 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1160 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) 1161 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
1161 printk("%s: Lock reclaim failed!\n", 1162 printk("%s: Lock reclaim failed!\n",
1162 __func__); 1163 __func__);
1163 } 1164 }
1165 spin_unlock(&state->state_lock);
1164 nfs4_put_open_state(state); 1166 nfs4_put_open_state(state);
1165 goto restart; 1167 goto restart;
1166 } 1168 }
@@ -1224,10 +1226,12 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
1224 clear_bit(NFS_O_RDONLY_STATE, &state->flags); 1226 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1225 clear_bit(NFS_O_WRONLY_STATE, &state->flags); 1227 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1226 clear_bit(NFS_O_RDWR_STATE, &state->flags); 1228 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1229 spin_lock(&state->state_lock);
1227 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1230 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1228 lock->ls_seqid.flags = 0; 1231 lock->ls_seqid.flags = 0;
1229 lock->ls_flags &= ~NFS_LOCK_INITIALIZED; 1232 lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
1230 } 1233 }
1234 spin_unlock(&state->state_lock);
1231} 1235}
1232 1236
1233static void nfs4_reset_seqids(struct nfs_server *server, 1237static void nfs4_reset_seqids(struct nfs_server *server,
@@ -1350,12 +1354,14 @@ static void nfs4_warn_keyexpired(const char *s)
1350static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) 1354static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1351{ 1355{
1352 switch (error) { 1356 switch (error) {
1357 case 0:
1358 break;
1353 case -NFS4ERR_CB_PATH_DOWN: 1359 case -NFS4ERR_CB_PATH_DOWN:
1354 nfs_handle_cb_pathdown(clp); 1360 nfs_handle_cb_pathdown(clp);
1355 return 0; 1361 break;
1356 case -NFS4ERR_NO_GRACE: 1362 case -NFS4ERR_NO_GRACE:
1357 nfs4_state_end_reclaim_reboot(clp); 1363 nfs4_state_end_reclaim_reboot(clp);
1358 return 0; 1364 break;
1359 case -NFS4ERR_STALE_CLIENTID: 1365 case -NFS4ERR_STALE_CLIENTID:
1360 case -NFS4ERR_LEASE_MOVED: 1366 case -NFS4ERR_LEASE_MOVED:
1361 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1367 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
@@ -1375,13 +1381,15 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1375 case -NFS4ERR_SEQ_MISORDERED: 1381 case -NFS4ERR_SEQ_MISORDERED:
1376 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1382 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1377 /* Zero session reset errors */ 1383 /* Zero session reset errors */
1378 return 0; 1384 break;
1379 case -EKEYEXPIRED: 1385 case -EKEYEXPIRED:
1380 /* Nothing we can do */ 1386 /* Nothing we can do */
1381 nfs4_warn_keyexpired(clp->cl_hostname); 1387 nfs4_warn_keyexpired(clp->cl_hostname);
1382 return 0; 1388 break;
1389 default:
1390 return error;
1383 } 1391 }
1384 return error; 1392 return 0;
1385} 1393}
1386 1394
1387static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) 1395static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
@@ -1428,7 +1436,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
1428 struct rpc_cred *cred; 1436 struct rpc_cred *cred;
1429 const struct nfs4_state_maintenance_ops *ops = 1437 const struct nfs4_state_maintenance_ops *ops =
1430 clp->cl_mvops->state_renewal_ops; 1438 clp->cl_mvops->state_renewal_ops;
1431 int status = -NFS4ERR_EXPIRED; 1439 int status;
1432 1440
1433 /* Is the client already known to have an expired lease? */ 1441 /* Is the client already known to have an expired lease? */
1434 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1442 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
@@ -1438,6 +1446,7 @@ static int nfs4_check_lease(struct nfs_client *clp)
1438 spin_unlock(&clp->cl_lock); 1446 spin_unlock(&clp->cl_lock);
1439 if (cred == NULL) { 1447 if (cred == NULL) {
1440 cred = nfs4_get_setclientid_cred(clp); 1448 cred = nfs4_get_setclientid_cred(clp);
1449 status = -ENOKEY;
1441 if (cred == NULL) 1450 if (cred == NULL)
1442 goto out; 1451 goto out;
1443 } 1452 }
@@ -1525,16 +1534,16 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
1525{ 1534{
1526 if (!flags) 1535 if (!flags)
1527 return; 1536 return;
1528 else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) 1537 if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
1529 nfs41_handle_server_reboot(clp); 1538 nfs41_handle_server_reboot(clp);
1530 else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | 1539 if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
1531 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | 1540 SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
1532 SEQ4_STATUS_ADMIN_STATE_REVOKED | 1541 SEQ4_STATUS_ADMIN_STATE_REVOKED |
1533 SEQ4_STATUS_LEASE_MOVED)) 1542 SEQ4_STATUS_LEASE_MOVED))
1534 nfs41_handle_state_revoked(clp); 1543 nfs41_handle_state_revoked(clp);
1535 else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) 1544 if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
1536 nfs41_handle_recallable_state_revoked(clp); 1545 nfs41_handle_recallable_state_revoked(clp);
1537 else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | 1546 if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
1538 SEQ4_STATUS_BACKCHANNEL_FAULT | 1547 SEQ4_STATUS_BACKCHANNEL_FAULT |
1539 SEQ4_STATUS_CB_PATH_DOWN_SESSION)) 1548 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
1540 nfs41_handle_cb_path_down(clp); 1549 nfs41_handle_cb_path_down(clp);
@@ -1662,10 +1671,10 @@ static void nfs4_state_manager(struct nfs_client *clp)
1662 1671
1663 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1672 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
1664 status = nfs4_check_lease(clp); 1673 status = nfs4_check_lease(clp);
1674 if (status < 0)
1675 goto out_error;
1665 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) 1676 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1666 continue; 1677 continue;
1667 if (status < 0 && status != -NFS4ERR_CB_PATH_DOWN)
1668 goto out_error;
1669 } 1678 }
1670 1679
1671 /* Initialize or reset the session */ 1680 /* Initialize or reset the session */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f48125da198a..0c672588fe5a 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -41,6 +41,7 @@
41#include <linux/nfs_fs.h> 41#include <linux/nfs_fs.h>
42#include <linux/nfs_page.h> 42#include <linux/nfs_page.h>
43#include <linux/lockd/bind.h> 43#include <linux/lockd/bind.h>
44#include <linux/freezer.h>
44#include "internal.h" 45#include "internal.h"
45 46
46#define NFSDBG_FACILITY NFSDBG_PROC 47#define NFSDBG_FACILITY NFSDBG_PROC
@@ -59,7 +60,7 @@ nfs_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
59 res = rpc_call_sync(clnt, msg, flags); 60 res = rpc_call_sync(clnt, msg, flags);
60 if (res != -EKEYEXPIRED) 61 if (res != -EKEYEXPIRED)
61 break; 62 break;
62 schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME); 63 freezable_schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
63 res = -ERESTARTSYS; 64 res = -ERESTARTSYS;
64 } while (!fatal_signal_pending(current)); 65 } while (!fatal_signal_pending(current));
65 return res; 66 return res;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 134777406ee3..e463967aafb8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -41,7 +41,6 @@
41#include <linux/lockd/bind.h> 41#include <linux/lockd/bind.h>
42#include <linux/seq_file.h> 42#include <linux/seq_file.h>
43#include <linux/mount.h> 43#include <linux/mount.h>
44#include <linux/mnt_namespace.h>
45#include <linux/namei.h> 44#include <linux/namei.h>
46#include <linux/nfs_idmap.h> 45#include <linux/nfs_idmap.h>
47#include <linux/vfs.h> 46#include <linux/vfs.h>
@@ -263,10 +262,10 @@ static match_table_t nfs_local_lock_tokens = {
263 262
264static void nfs_umount_begin(struct super_block *); 263static void nfs_umount_begin(struct super_block *);
265static int nfs_statfs(struct dentry *, struct kstatfs *); 264static int nfs_statfs(struct dentry *, struct kstatfs *);
266static int nfs_show_options(struct seq_file *, struct vfsmount *); 265static int nfs_show_options(struct seq_file *, struct dentry *);
267static int nfs_show_devname(struct seq_file *, struct vfsmount *); 266static int nfs_show_devname(struct seq_file *, struct dentry *);
268static int nfs_show_path(struct seq_file *, struct vfsmount *); 267static int nfs_show_path(struct seq_file *, struct dentry *);
269static int nfs_show_stats(struct seq_file *, struct vfsmount *); 268static int nfs_show_stats(struct seq_file *, struct dentry *);
270static struct dentry *nfs_fs_mount(struct file_system_type *, 269static struct dentry *nfs_fs_mount(struct file_system_type *,
271 int, const char *, void *); 270 int, const char *, void *);
272static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 271static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
@@ -721,9 +720,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
721/* 720/*
722 * Describe the mount options on this VFS mountpoint 721 * Describe the mount options on this VFS mountpoint
723 */ 722 */
724static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) 723static int nfs_show_options(struct seq_file *m, struct dentry *root)
725{ 724{
726 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); 725 struct nfs_server *nfss = NFS_SB(root->d_sb);
727 726
728 nfs_show_mount_options(m, nfss, 0); 727 nfs_show_mount_options(m, nfss, 0);
729 728
@@ -761,14 +760,14 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) {}
761#endif 760#endif
762#endif 761#endif
763 762
764static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) 763static int nfs_show_devname(struct seq_file *m, struct dentry *root)
765{ 764{
766 char *page = (char *) __get_free_page(GFP_KERNEL); 765 char *page = (char *) __get_free_page(GFP_KERNEL);
767 char *devname, *dummy; 766 char *devname, *dummy;
768 int err = 0; 767 int err = 0;
769 if (!page) 768 if (!page)
770 return -ENOMEM; 769 return -ENOMEM;
771 devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE); 770 devname = nfs_path(&dummy, root, page, PAGE_SIZE);
772 if (IS_ERR(devname)) 771 if (IS_ERR(devname))
773 err = PTR_ERR(devname); 772 err = PTR_ERR(devname);
774 else 773 else
@@ -777,7 +776,7 @@ static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
777 return err; 776 return err;
778} 777}
779 778
780static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt) 779static int nfs_show_path(struct seq_file *m, struct dentry *dentry)
781{ 780{
782 seq_puts(m, "/"); 781 seq_puts(m, "/");
783 return 0; 782 return 0;
@@ -786,10 +785,10 @@ static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
786/* 785/*
787 * Present statistical information for this VFS mountpoint 786 * Present statistical information for this VFS mountpoint
788 */ 787 */
789static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) 788static int nfs_show_stats(struct seq_file *m, struct dentry *root)
790{ 789{
791 int i, cpu; 790 int i, cpu;
792 struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); 791 struct nfs_server *nfss = NFS_SB(root->d_sb);
793 struct rpc_auth *auth = nfss->client->cl_auth; 792 struct rpc_auth *auth = nfss->client->cl_auth;
794 struct nfs_iostats totals = { }; 793 struct nfs_iostats totals = { };
795 794
@@ -799,10 +798,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
799 * Display all mount option settings 798 * Display all mount option settings
800 */ 799 */
801 seq_printf(m, "\n\topts:\t"); 800 seq_printf(m, "\n\topts:\t");
802 seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); 801 seq_puts(m, root->d_sb->s_flags & MS_RDONLY ? "ro" : "rw");
803 seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); 802 seq_puts(m, root->d_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
804 seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); 803 seq_puts(m, root->d_sb->s_flags & MS_NOATIME ? ",noatime" : "");
805 seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : ""); 804 seq_puts(m, root->d_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
806 nfs_show_mount_options(m, nfss, 1); 805 nfs_show_mount_options(m, nfss, 1);
807 806
808 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); 807 seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
@@ -2788,11 +2787,15 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2788 const char *export_path) 2787 const char *export_path)
2789{ 2788{
2790 struct dentry *dentry; 2789 struct dentry *dentry;
2791 int ret = nfs_referral_loop_protect(); 2790 int err;
2792 2791
2793 if (ret) { 2792 if (IS_ERR(root_mnt))
2793 return ERR_CAST(root_mnt);
2794
2795 err = nfs_referral_loop_protect();
2796 if (err) {
2794 mntput(root_mnt); 2797 mntput(root_mnt);
2795 return ERR_PTR(ret); 2798 return ERR_PTR(err);
2796 } 2799 }
2797 2800
2798 dentry = mount_subtree(root_mnt, export_path); 2801 dentry = mount_subtree(root_mnt, export_path);
@@ -2816,9 +2819,7 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2816 data->nfs_server.hostname); 2819 data->nfs_server.hostname);
2817 data->nfs_server.export_path = export_path; 2820 data->nfs_server.export_path = export_path;
2818 2821
2819 res = ERR_CAST(root_mnt); 2822 res = nfs_follow_remote_path(root_mnt, export_path);
2820 if (!IS_ERR(root_mnt))
2821 res = nfs_follow_remote_path(root_mnt, export_path);
2822 2823
2823 dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", 2824 dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
2824 IS_ERR(res) ? PTR_ERR(res) : 0, 2825 IS_ERR(res) ? PTR_ERR(res) : 0,
@@ -3079,9 +3080,7 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
3079 flags, data, data->hostname); 3080 flags, data, data->hostname);
3080 data->mnt_path = export_path; 3081 data->mnt_path = export_path;
3081 3082
3082 res = ERR_CAST(root_mnt); 3083 res = nfs_follow_remote_path(root_mnt, export_path);
3083 if (!IS_ERR(root_mnt))
3084 res = nfs_follow_remote_path(root_mnt, export_path);
3085 dprintk("<-- nfs4_referral_mount() = %ld%s\n", 3084 dprintk("<-- nfs4_referral_mount() = %ld%s\n",
3086 IS_ERR(res) ? PTR_ERR(res) : 0, 3085 IS_ERR(res) ? PTR_ERR(res) : 0,
3087 IS_ERR(res) ? " [error]" : ""); 3086 IS_ERR(res) ? " [error]" : "");
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index fa383361bc61..c5e28ed8bca0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -838,7 +838,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
838 return status; 838 return status;
839 } 839 }
840 } 840 }
841 status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt); 841 status = fh_want_write(&cstate->current_fh);
842 if (status) 842 if (status)
843 return status; 843 return status;
844 status = nfs_ok; 844 status = nfs_ok;
@@ -856,7 +856,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
856 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 856 status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr,
857 0, (time_t)0); 857 0, (time_t)0);
858out: 858out:
859 mnt_drop_write(cstate->current_fh.fh_export->ex_path.mnt); 859 fh_drop_write(&cstate->current_fh);
860 return status; 860 return status;
861} 861}
862 862
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ed083b9a731b..80a0be9ed008 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -147,11 +147,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
147 status = -EEXIST; 147 status = -EEXIST;
148 if (dentry->d_inode) 148 if (dentry->d_inode)
149 goto out_put; 149 goto out_put;
150 status = mnt_want_write(rec_file->f_path.mnt); 150 status = mnt_want_write_file(rec_file);
151 if (status) 151 if (status)
152 goto out_put; 152 goto out_put;
153 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU); 153 status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
154 mnt_drop_write(rec_file->f_path.mnt); 154 mnt_drop_write_file(rec_file);
155out_put: 155out_put:
156 dput(dentry); 156 dput(dentry);
157out_unlock: 157out_unlock:
@@ -268,7 +268,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
268 if (!rec_file || !clp->cl_firststate) 268 if (!rec_file || !clp->cl_firststate)
269 return; 269 return;
270 270
271 status = mnt_want_write(rec_file->f_path.mnt); 271 status = mnt_want_write_file(rec_file);
272 if (status) 272 if (status)
273 goto out; 273 goto out;
274 clp->cl_firststate = 0; 274 clp->cl_firststate = 0;
@@ -281,7 +281,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
281 nfs4_reset_creds(original_cred); 281 nfs4_reset_creds(original_cred);
282 if (status == 0) 282 if (status == 0)
283 vfs_fsync(rec_file, 0); 283 vfs_fsync(rec_file, 0);
284 mnt_drop_write(rec_file->f_path.mnt); 284 mnt_drop_write_file(rec_file);
285out: 285out:
286 if (status) 286 if (status)
287 printk("NFSD: Failed to remove expired client state directory" 287 printk("NFSD: Failed to remove expired client state directory"
@@ -311,13 +311,13 @@ nfsd4_recdir_purge_old(void) {
311 311
312 if (!rec_file) 312 if (!rec_file)
313 return; 313 return;
314 status = mnt_want_write(rec_file->f_path.mnt); 314 status = mnt_want_write_file(rec_file);
315 if (status) 315 if (status)
316 goto out; 316 goto out;
317 status = nfsd4_list_rec_dir(purge_old); 317 status = nfsd4_list_rec_dir(purge_old);
318 if (status == 0) 318 if (status == 0)
319 vfs_fsync(rec_file, 0); 319 vfs_fsync(rec_file, 0);
320 mnt_drop_write(rec_file->f_path.mnt); 320 mnt_drop_write_file(rec_file);
321out: 321out:
322 if (status) 322 if (status)
323 printk("nfsd4: failed to purge old clients from recovery" 323 printk("nfsd4: failed to purge old clients from recovery"
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 47e94e33a975..9ca16dc09e04 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -658,7 +658,7 @@ static int nfsd4_sanitize_slot_size(u32 size)
658/* 658/*
659 * XXX: If we run out of reserved DRC memory we could (up to a point) 659 * XXX: If we run out of reserved DRC memory we could (up to a point)
660 * re-negotiate active sessions and reduce their slot usage to make 660 * re-negotiate active sessions and reduce their slot usage to make
661 * rooom for new connections. For now we just fail the create session. 661 * room for new connections. For now we just fail the create session.
662 */ 662 */
663static int nfsd4_get_drc_mem(int slotsize, u32 num) 663static int nfsd4_get_drc_mem(int slotsize, u32 num)
664{ 664{
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c45a2ea4a090..bb4a11d58a5a 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -272,7 +272,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
272 * 2. Is that directory a mount point, or 272 * 2. Is that directory a mount point, or
273 * 3. Is that directory the root of an exported file system? 273 * 3. Is that directory the root of an exported file system?
274 */ 274 */
275 error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb); 275 error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
276 276
277 path_put(&path); 277 path_put(&path);
278 return error; 278 return error;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c763de5c1157..68454e75fce9 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -59,7 +59,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
59 * the write call). 59 * the write call).
60 */ 60 */
61static inline __be32 61static inline __be32
62nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int requested) 62nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, umode_t requested)
63{ 63{
64 mode &= S_IFMT; 64 mode &= S_IFMT;
65 65
@@ -293,7 +293,7 @@ out:
293 * include/linux/nfsd/nfsd.h. 293 * include/linux/nfsd/nfsd.h.
294 */ 294 */
295__be32 295__be32
296fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) 296fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
297{ 297{
298 struct svc_export *exp; 298 struct svc_export *exp;
299 struct dentry *dentry; 299 struct dentry *dentry;
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index c16f8d8331b5..e5e6707ba687 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -102,7 +102,7 @@ extern char * SVCFH_fmt(struct svc_fh *fhp);
102/* 102/*
103 * Function prototypes 103 * Function prototypes
104 */ 104 */
105__be32 fh_verify(struct svc_rqst *, struct svc_fh *, int, int); 105__be32 fh_verify(struct svc_rqst *, struct svc_fh *, umode_t, int);
106__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *); 106__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
107__be32 fh_update(struct svc_fh *); 107__be32 fh_update(struct svc_fh *);
108void fh_put(struct svc_fh *); 108void fh_put(struct svc_fh *);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 7a2e442623c8..d25a723b68ad 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -307,7 +307,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
307 struct dentry *dentry; 307 struct dentry *dentry;
308 struct inode *inode; 308 struct inode *inode;
309 int accmode = NFSD_MAY_SATTR; 309 int accmode = NFSD_MAY_SATTR;
310 int ftype = 0; 310 umode_t ftype = 0;
311 __be32 err; 311 __be32 err;
312 int host_err; 312 int host_err;
313 int size_change = 0; 313 int size_change = 0;
@@ -730,7 +730,7 @@ static int nfsd_open_break_lease(struct inode *inode, int access)
730 * N.B. After this call fhp needs an fh_put 730 * N.B. After this call fhp needs an fh_put
731 */ 731 */
732__be32 732__be32
733nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, 733nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
734 int access, struct file **filp) 734 int access, struct file **filp)
735{ 735{
736 struct dentry *dentry; 736 struct dentry *dentry;
@@ -1300,7 +1300,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1300 goto out; 1300 goto out;
1301 } 1301 }
1302 1302
1303 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1303 host_err = fh_want_write(fhp);
1304 if (host_err) 1304 if (host_err)
1305 goto out_nfserr; 1305 goto out_nfserr;
1306 1306
@@ -1325,7 +1325,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1325 break; 1325 break;
1326 } 1326 }
1327 if (host_err < 0) { 1327 if (host_err < 0) {
1328 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1328 fh_drop_write(fhp);
1329 goto out_nfserr; 1329 goto out_nfserr;
1330 } 1330 }
1331 1331
@@ -1339,7 +1339,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1339 err2 = nfserrno(commit_metadata(fhp)); 1339 err2 = nfserrno(commit_metadata(fhp));
1340 if (err2) 1340 if (err2)
1341 err = err2; 1341 err = err2;
1342 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1342 fh_drop_write(fhp);
1343 /* 1343 /*
1344 * Update the file handle to get the new inode info. 1344 * Update the file handle to get the new inode info.
1345 */ 1345 */
@@ -1430,7 +1430,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1430 v_atime = verifier[1]&0x7fffffff; 1430 v_atime = verifier[1]&0x7fffffff;
1431 } 1431 }
1432 1432
1433 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1433 host_err = fh_want_write(fhp);
1434 if (host_err) 1434 if (host_err)
1435 goto out_nfserr; 1435 goto out_nfserr;
1436 if (dchild->d_inode) { 1436 if (dchild->d_inode) {
@@ -1469,13 +1469,13 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1469 case NFS3_CREATE_GUARDED: 1469 case NFS3_CREATE_GUARDED:
1470 err = nfserr_exist; 1470 err = nfserr_exist;
1471 } 1471 }
1472 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1472 fh_drop_write(fhp);
1473 goto out; 1473 goto out;
1474 } 1474 }
1475 1475
1476 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1476 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1477 if (host_err < 0) { 1477 if (host_err < 0) {
1478 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1478 fh_drop_write(fhp);
1479 goto out_nfserr; 1479 goto out_nfserr;
1480 } 1480 }
1481 if (created) 1481 if (created)
@@ -1503,7 +1503,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1503 if (!err) 1503 if (!err)
1504 err = nfserrno(commit_metadata(fhp)); 1504 err = nfserrno(commit_metadata(fhp));
1505 1505
1506 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1506 fh_drop_write(fhp);
1507 /* 1507 /*
1508 * Update the filehandle to get the new inode info. 1508 * Update the filehandle to get the new inode info.
1509 */ 1509 */
@@ -1600,7 +1600,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1600 if (IS_ERR(dnew)) 1600 if (IS_ERR(dnew))
1601 goto out_nfserr; 1601 goto out_nfserr;
1602 1602
1603 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1603 host_err = fh_want_write(fhp);
1604 if (host_err) 1604 if (host_err)
1605 goto out_nfserr; 1605 goto out_nfserr;
1606 1606
@@ -1621,7 +1621,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1621 err = nfserrno(commit_metadata(fhp)); 1621 err = nfserrno(commit_metadata(fhp));
1622 fh_unlock(fhp); 1622 fh_unlock(fhp);
1623 1623
1624 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1624 fh_drop_write(fhp);
1625 1625
1626 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); 1626 cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1627 dput(dnew); 1627 dput(dnew);
@@ -1674,7 +1674,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1674 1674
1675 dold = tfhp->fh_dentry; 1675 dold = tfhp->fh_dentry;
1676 1676
1677 host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt); 1677 host_err = fh_want_write(tfhp);
1678 if (host_err) { 1678 if (host_err) {
1679 err = nfserrno(host_err); 1679 err = nfserrno(host_err);
1680 goto out_dput; 1680 goto out_dput;
@@ -1699,7 +1699,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1699 err = nfserrno(host_err); 1699 err = nfserrno(host_err);
1700 } 1700 }
1701out_drop_write: 1701out_drop_write:
1702 mnt_drop_write(tfhp->fh_export->ex_path.mnt); 1702 fh_drop_write(tfhp);
1703out_dput: 1703out_dput:
1704 dput(dnew); 1704 dput(dnew);
1705out_unlock: 1705out_unlock:
@@ -1776,7 +1776,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1776 host_err = -EXDEV; 1776 host_err = -EXDEV;
1777 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt) 1777 if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1778 goto out_dput_new; 1778 goto out_dput_new;
1779 host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt); 1779 host_err = fh_want_write(ffhp);
1780 if (host_err) 1780 if (host_err)
1781 goto out_dput_new; 1781 goto out_dput_new;
1782 1782
@@ -1795,7 +1795,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1795 host_err = commit_metadata(ffhp); 1795 host_err = commit_metadata(ffhp);
1796 } 1796 }
1797out_drop_write: 1797out_drop_write:
1798 mnt_drop_write(ffhp->fh_export->ex_path.mnt); 1798 fh_drop_write(ffhp);
1799 out_dput_new: 1799 out_dput_new:
1800 dput(ndentry); 1800 dput(ndentry);
1801 out_dput_old: 1801 out_dput_old:
@@ -1854,7 +1854,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1854 if (!type) 1854 if (!type)
1855 type = rdentry->d_inode->i_mode & S_IFMT; 1855 type = rdentry->d_inode->i_mode & S_IFMT;
1856 1856
1857 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1857 host_err = fh_want_write(fhp);
1858 if (host_err) 1858 if (host_err)
1859 goto out_put; 1859 goto out_put;
1860 1860
@@ -1868,7 +1868,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1868 if (!host_err) 1868 if (!host_err)
1869 host_err = commit_metadata(fhp); 1869 host_err = commit_metadata(fhp);
1870out_drop_write: 1870out_drop_write:
1871 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1871 fh_drop_write(fhp);
1872out_put: 1872out_put:
1873 dput(rdentry); 1873 dput(rdentry);
1874 1874
@@ -2270,7 +2270,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2270 } else 2270 } else
2271 size = 0; 2271 size = 0;
2272 2272
2273 error = mnt_want_write(fhp->fh_export->ex_path.mnt); 2273 error = fh_want_write(fhp);
2274 if (error) 2274 if (error)
2275 goto getout; 2275 goto getout;
2276 if (size) 2276 if (size)
@@ -2284,7 +2284,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
2284 error = 0; 2284 error = 0;
2285 } 2285 }
2286 } 2286 }
2287 mnt_drop_write(fhp->fh_export->ex_path.mnt); 2287 fh_drop_write(fhp);
2288 2288
2289getout: 2289getout:
2290 kfree(value); 2290 kfree(value);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 3f54ad03bb2b..1dcd238e11a0 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -66,7 +66,7 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
66__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, 66__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
67 loff_t, unsigned long); 67 loff_t, unsigned long);
68#endif /* CONFIG_NFSD_V3 */ 68#endif /* CONFIG_NFSD_V3 */
69__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int, 69__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
70 int, struct file **); 70 int, struct file **);
71void nfsd_close(struct file *); 71void nfsd_close(struct file *);
72__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, 72__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
@@ -106,4 +106,14 @@ struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
106int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); 106int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
107#endif 107#endif
108 108
109static inline int fh_want_write(struct svc_fh *fh)
110{
111 return mnt_want_write(fh->fh_export->ex_path.mnt);
112}
113
114static inline void fh_drop_write(struct svc_fh *fh)
115{
116 mnt_drop_write(fh->fh_export->ex_path.mnt);
117}
118
109#endif /* LINUX_NFSD_VFS_H */ 119#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 3a1923943b14..ca35b3a46d17 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -251,7 +251,7 @@ nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
251 251
252static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) 252static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode)
253{ 253{
254 mode_t mode = inode->i_mode; 254 umode_t mode = inode->i_mode;
255 255
256 de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 256 de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
257} 257}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b50ffb72e5b3..8f7b95ac1f7e 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -291,7 +291,7 @@ const struct address_space_operations nilfs_aops = {
291 .is_partially_uptodate = block_is_partially_uptodate, 291 .is_partially_uptodate = block_is_partially_uptodate,
292}; 292};
293 293
294struct inode *nilfs_new_inode(struct inode *dir, int mode) 294struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
295{ 295{
296 struct super_block *sb = dir->i_sb; 296 struct super_block *sb = dir->i_sb;
297 struct the_nilfs *nilfs = sb->s_fs_info; 297 struct the_nilfs *nilfs = sb->s_fs_info;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 41d6743d303c..886649627c3d 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -27,7 +27,7 @@
27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ 27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/compat.h> /* compat_ptr() */ 29#include <linux/compat.h> /* compat_ptr() */
30#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */ 30#include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */
31#include <linux/buffer_head.h> 31#include <linux/buffer_head.h>
32#include <linux/nilfs2_fs.h> 32#include <linux/nilfs2_fs.h>
33#include "nilfs.h" 33#include "nilfs.h"
@@ -119,7 +119,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
119 if (get_user(flags, (int __user *)argp)) 119 if (get_user(flags, (int __user *)argp))
120 return -EFAULT; 120 return -EFAULT;
121 121
122 ret = mnt_want_write(filp->f_path.mnt); 122 ret = mnt_want_write_file(filp);
123 if (ret) 123 if (ret)
124 return ret; 124 return ret;
125 125
@@ -154,7 +154,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
154 ret = nilfs_transaction_commit(inode->i_sb); 154 ret = nilfs_transaction_commit(inode->i_sb);
155out: 155out:
156 mutex_unlock(&inode->i_mutex); 156 mutex_unlock(&inode->i_mutex);
157 mnt_drop_write(filp->f_path.mnt); 157 mnt_drop_write_file(filp);
158 return ret; 158 return ret;
159} 159}
160 160
@@ -174,7 +174,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
174 if (!capable(CAP_SYS_ADMIN)) 174 if (!capable(CAP_SYS_ADMIN))
175 return -EPERM; 175 return -EPERM;
176 176
177 ret = mnt_want_write(filp->f_path.mnt); 177 ret = mnt_want_write_file(filp);
178 if (ret) 178 if (ret)
179 return ret; 179 return ret;
180 180
@@ -194,7 +194,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
194 194
195 up_read(&inode->i_sb->s_umount); 195 up_read(&inode->i_sb->s_umount);
196out: 196out:
197 mnt_drop_write(filp->f_path.mnt); 197 mnt_drop_write_file(filp);
198 return ret; 198 return ret;
199} 199}
200 200
@@ -210,7 +210,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
210 if (!capable(CAP_SYS_ADMIN)) 210 if (!capable(CAP_SYS_ADMIN))
211 return -EPERM; 211 return -EPERM;
212 212
213 ret = mnt_want_write(filp->f_path.mnt); 213 ret = mnt_want_write_file(filp);
214 if (ret) 214 if (ret)
215 return ret; 215 return ret;
216 216
@@ -225,7 +225,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
225 else 225 else
226 nilfs_transaction_commit(inode->i_sb); /* never fails */ 226 nilfs_transaction_commit(inode->i_sb); /* never fails */
227out: 227out:
228 mnt_drop_write(filp->f_path.mnt); 228 mnt_drop_write_file(filp);
229 return ret; 229 return ret;
230} 230}
231 231
@@ -591,7 +591,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
591 if (!capable(CAP_SYS_ADMIN)) 591 if (!capable(CAP_SYS_ADMIN))
592 return -EPERM; 592 return -EPERM;
593 593
594 ret = mnt_want_write(filp->f_path.mnt); 594 ret = mnt_want_write_file(filp);
595 if (ret) 595 if (ret)
596 return ret; 596 return ret;
597 597
@@ -625,6 +625,9 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
625 if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment) 625 if (argv[n].v_nmembs > nsegs * nilfs->ns_blocks_per_segment)
626 goto out_free; 626 goto out_free;
627 627
628 if (argv[n].v_nmembs >= UINT_MAX / argv[n].v_size)
629 goto out_free;
630
628 len = argv[n].v_size * argv[n].v_nmembs; 631 len = argv[n].v_size * argv[n].v_nmembs;
629 base = (void __user *)(unsigned long)argv[n].v_base; 632 base = (void __user *)(unsigned long)argv[n].v_base;
630 if (len == 0) { 633 if (len == 0) {
@@ -672,7 +675,7 @@ out_free:
672 vfree(kbufs[n]); 675 vfree(kbufs[n]);
673 kfree(kbufs[4]); 676 kfree(kbufs[4]);
674out: 677out:
675 mnt_drop_write(filp->f_path.mnt); 678 mnt_drop_write_file(filp);
676 return ret; 679 return ret;
677} 680}
678 681
@@ -707,7 +710,7 @@ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
707 if (!capable(CAP_SYS_ADMIN)) 710 if (!capable(CAP_SYS_ADMIN))
708 goto out; 711 goto out;
709 712
710 ret = mnt_want_write(filp->f_path.mnt); 713 ret = mnt_want_write_file(filp);
711 if (ret) 714 if (ret)
712 goto out; 715 goto out;
713 716
@@ -718,7 +721,7 @@ static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
718 ret = nilfs_resize_fs(inode->i_sb, newsize); 721 ret = nilfs_resize_fs(inode->i_sb, newsize);
719 722
720out_drop_write: 723out_drop_write:
721 mnt_drop_write(filp->f_path.mnt); 724 mnt_drop_write_file(filp);
722out: 725out:
723 return ret; 726 return ret;
724} 727}
@@ -842,6 +845,19 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
842 case FS_IOC32_GETVERSION: 845 case FS_IOC32_GETVERSION:
843 cmd = FS_IOC_GETVERSION; 846 cmd = FS_IOC_GETVERSION;
844 break; 847 break;
848 case NILFS_IOCTL_CHANGE_CPMODE:
849 case NILFS_IOCTL_DELETE_CHECKPOINT:
850 case NILFS_IOCTL_GET_CPINFO:
851 case NILFS_IOCTL_GET_CPSTAT:
852 case NILFS_IOCTL_GET_SUINFO:
853 case NILFS_IOCTL_GET_SUSTAT:
854 case NILFS_IOCTL_GET_VINFO:
855 case NILFS_IOCTL_GET_BDESCS:
856 case NILFS_IOCTL_CLEAN_SEGMENTS:
857 case NILFS_IOCTL_SYNC:
858 case NILFS_IOCTL_RESIZE:
859 case NILFS_IOCTL_SET_ALLOC_RANGE:
860 break;
845 default: 861 default:
846 return -ENOIOCTLCMD; 862 return -ENOIOCTLCMD;
847 } 863 }
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 768982de10e4..1cd3f624dffc 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -84,7 +84,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
84 * If the create succeeds, we fill in the inode information 84 * If the create succeeds, we fill in the inode information
85 * with d_instantiate(). 85 * with d_instantiate().
86 */ 86 */
87static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, 87static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
88 struct nameidata *nd) 88 struct nameidata *nd)
89{ 89{
90 struct inode *inode; 90 struct inode *inode;
@@ -112,7 +112,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode,
112} 112}
113 113
114static int 114static int
115nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) 115nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
116{ 116{
117 struct inode *inode; 117 struct inode *inode;
118 struct nilfs_transaction_info ti; 118 struct nilfs_transaction_info ti;
@@ -213,7 +213,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
213 return err; 213 return err;
214} 214}
215 215
216static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 216static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
217{ 217{
218 struct inode *inode; 218 struct inode *inode;
219 struct nilfs_transaction_info ti; 219 struct nilfs_transaction_info ti;
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 3777d138f895..250add84da76 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -246,7 +246,7 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
246/* inode.c */ 246/* inode.c */
247void nilfs_inode_add_blocks(struct inode *inode, int n); 247void nilfs_inode_add_blocks(struct inode *inode, int n);
248void nilfs_inode_sub_blocks(struct inode *inode, int n); 248void nilfs_inode_sub_blocks(struct inode *inode, int n);
249extern struct inode *nilfs_new_inode(struct inode *, int); 249extern struct inode *nilfs_new_inode(struct inode *, umode_t);
250extern void nilfs_free_inode(struct inode *); 250extern void nilfs_free_inode(struct inode *);
251extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 251extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
252extern void nilfs_set_inode_flags(struct inode *); 252extern void nilfs_set_inode_flags(struct inode *);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index bb24ab6c282f..0e72ad6f22aa 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2470,7 +2470,7 @@ static int nilfs_segctor_thread(void *arg)
2470 2470
2471 if (freezing(current)) { 2471 if (freezing(current)) {
2472 spin_unlock(&sci->sc_state_lock); 2472 spin_unlock(&sci->sc_state_lock);
2473 refrigerator(); 2473 try_to_freeze();
2474 spin_lock(&sci->sc_state_lock); 2474 spin_lock(&sci->sc_state_lock);
2475 } else { 2475 } else {
2476 DEFINE_WAIT(wait); 2476 DEFINE_WAIT(wait);
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 8351c44a7320..08e3d4f9df18 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,8 +175,6 @@ static void nilfs_i_callback(struct rcu_head *head)
175 struct inode *inode = container_of(head, struct inode, i_rcu); 175 struct inode *inode = container_of(head, struct inode, i_rcu);
176 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 176 struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
177 177
178 INIT_LIST_HEAD(&inode->i_dentry);
179
180 if (mdi) { 178 if (mdi) {
181 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 179 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */
182 kfree(mdi); 180 kfree(mdi);
@@ -650,11 +648,11 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
650 return 0; 648 return 0;
651} 649}
652 650
653static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 651static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
654{ 652{
655 struct super_block *sb = vfs->mnt_sb; 653 struct super_block *sb = dentry->d_sb;
656 struct the_nilfs *nilfs = sb->s_fs_info; 654 struct the_nilfs *nilfs = sb->s_fs_info;
657 struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; 655 struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
658 656
659 if (!nilfs_test_opt(nilfs, BARRIER)) 657 if (!nilfs_test_opt(nilfs, BARRIER))
660 seq_puts(seq, ",nobarrier"); 658 seq_puts(seq, ",nobarrier");
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9fde1c00a296..3568c8a8b138 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,8 @@
16 16
17#include <asm/ioctls.h> 17#include <asm/ioctls.h>
18 18
19#include "../../mount.h"
20
19#define FANOTIFY_DEFAULT_MAX_EVENTS 16384 21#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
20#define FANOTIFY_DEFAULT_MAX_MARKS 8192 22#define FANOTIFY_DEFAULT_MAX_MARKS 8192
21#define FANOTIFY_DEFAULT_MAX_LISTENERS 128 23#define FANOTIFY_DEFAULT_MAX_LISTENERS 128
@@ -546,7 +548,7 @@ static int fanotify_remove_vfsmount_mark(struct fsnotify_group *group,
546 548
547 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags); 549 removed = fanotify_mark_remove_from_mask(fsn_mark, mask, flags);
548 fsnotify_put_mark(fsn_mark); 550 fsnotify_put_mark(fsn_mark);
549 if (removed & mnt->mnt_fsnotify_mask) 551 if (removed & real_mount(mnt)->mnt_fsnotify_mask)
550 fsnotify_recalc_vfsmount_mask(mnt); 552 fsnotify_recalc_vfsmount_mask(mnt);
551 553
552 return 0; 554 return 0;
@@ -623,7 +625,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
623 } 625 }
624 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags); 626 added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
625 627
626 if (added & ~mnt->mnt_fsnotify_mask) 628 if (added & ~real_mount(mnt)->mnt_fsnotify_mask)
627 fsnotify_recalc_vfsmount_mask(mnt); 629 fsnotify_recalc_vfsmount_mask(mnt);
628err: 630err:
629 fsnotify_put_mark(fsn_mark); 631 fsnotify_put_mark(fsn_mark);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 79b47cbb5cd8..ccb14d3fc0de 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -26,6 +26,7 @@
26 26
27#include <linux/fsnotify_backend.h> 27#include <linux/fsnotify_backend.h>
28#include "fsnotify.h" 28#include "fsnotify.h"
29#include "../mount.h"
29 30
30/* 31/*
31 * Clear all of the marks on an inode when it is being evicted from core 32 * Clear all of the marks on an inode when it is being evicted from core
@@ -205,13 +206,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
205 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; 206 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
206 struct fsnotify_group *inode_group, *vfsmount_group; 207 struct fsnotify_group *inode_group, *vfsmount_group;
207 struct fsnotify_event *event = NULL; 208 struct fsnotify_event *event = NULL;
208 struct vfsmount *mnt; 209 struct mount *mnt;
209 int idx, ret = 0; 210 int idx, ret = 0;
210 /* global tests shouldn't care about events on child only the specific event */ 211 /* global tests shouldn't care about events on child only the specific event */
211 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); 212 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
212 213
213 if (data_is == FSNOTIFY_EVENT_PATH) 214 if (data_is == FSNOTIFY_EVENT_PATH)
214 mnt = ((struct path *)data)->mnt; 215 mnt = real_mount(((struct path *)data)->mnt);
215 else 216 else
216 mnt = NULL; 217 mnt = NULL;
217 218
@@ -262,11 +263,11 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
262 /* we didn't use the vfsmount_mark */ 263 /* we didn't use the vfsmount_mark */
263 vfsmount_group = NULL; 264 vfsmount_group = NULL;
264 } else if (vfsmount_group > inode_group) { 265 } else if (vfsmount_group > inode_group) {
265 ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data, 266 ret = send_to_group(to_tell, &mnt->mnt, NULL, vfsmount_mark, mask, data,
266 data_is, cookie, file_name, &event); 267 data_is, cookie, file_name, &event);
267 inode_group = NULL; 268 inode_group = NULL;
268 } else { 269 } else {
269 ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark, 270 ret = send_to_group(to_tell, &mnt->mnt, inode_mark, vfsmount_mark,
270 mask, data, data_is, cookie, file_name, 271 mask, data, data_is, cookie, file_name,
271 &event); 272 &event);
272 } 273 }
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 778fe6cae3b0..b7b4b0e8554f 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -28,15 +28,17 @@
28 28
29#include <linux/fsnotify_backend.h> 29#include <linux/fsnotify_backend.h>
30#include "fsnotify.h" 30#include "fsnotify.h"
31#include "../mount.h"
31 32
32void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) 33void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
33{ 34{
34 struct fsnotify_mark *mark, *lmark; 35 struct fsnotify_mark *mark, *lmark;
35 struct hlist_node *pos, *n; 36 struct hlist_node *pos, *n;
37 struct mount *m = real_mount(mnt);
36 LIST_HEAD(free_list); 38 LIST_HEAD(free_list);
37 39
38 spin_lock(&mnt->mnt_root->d_lock); 40 spin_lock(&mnt->mnt_root->d_lock);
39 hlist_for_each_entry_safe(mark, pos, n, &mnt->mnt_fsnotify_marks, m.m_list) { 41 hlist_for_each_entry_safe(mark, pos, n, &m->mnt_fsnotify_marks, m.m_list) {
40 list_add(&mark->m.free_m_list, &free_list); 42 list_add(&mark->m.free_m_list, &free_list);
41 hlist_del_init_rcu(&mark->m.m_list); 43 hlist_del_init_rcu(&mark->m.m_list);
42 fsnotify_get_mark(mark); 44 fsnotify_get_mark(mark);
@@ -59,15 +61,16 @@ void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
59 */ 61 */
60static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt) 62static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
61{ 63{
64 struct mount *m = real_mount(mnt);
62 struct fsnotify_mark *mark; 65 struct fsnotify_mark *mark;
63 struct hlist_node *pos; 66 struct hlist_node *pos;
64 __u32 new_mask = 0; 67 __u32 new_mask = 0;
65 68
66 assert_spin_locked(&mnt->mnt_root->d_lock); 69 assert_spin_locked(&mnt->mnt_root->d_lock);
67 70
68 hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) 71 hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list)
69 new_mask |= mark->mask; 72 new_mask |= mark->mask;
70 mnt->mnt_fsnotify_mask = new_mask; 73 m->mnt_fsnotify_mask = new_mask;
71} 74}
72 75
73/* 76/*
@@ -101,12 +104,13 @@ void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
101static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group, 104static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
102 struct vfsmount *mnt) 105 struct vfsmount *mnt)
103{ 106{
107 struct mount *m = real_mount(mnt);
104 struct fsnotify_mark *mark; 108 struct fsnotify_mark *mark;
105 struct hlist_node *pos; 109 struct hlist_node *pos;
106 110
107 assert_spin_locked(&mnt->mnt_root->d_lock); 111 assert_spin_locked(&mnt->mnt_root->d_lock);
108 112
109 hlist_for_each_entry(mark, pos, &mnt->mnt_fsnotify_marks, m.m_list) { 113 hlist_for_each_entry(mark, pos, &m->mnt_fsnotify_marks, m.m_list) {
110 if (mark->group == group) { 114 if (mark->group == group) {
111 fsnotify_get_mark(mark); 115 fsnotify_get_mark(mark);
112 return mark; 116 return mark;
@@ -140,6 +144,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
140 struct fsnotify_group *group, struct vfsmount *mnt, 144 struct fsnotify_group *group, struct vfsmount *mnt,
141 int allow_dups) 145 int allow_dups)
142{ 146{
147 struct mount *m = real_mount(mnt);
143 struct fsnotify_mark *lmark; 148 struct fsnotify_mark *lmark;
144 struct hlist_node *node, *last = NULL; 149 struct hlist_node *node, *last = NULL;
145 int ret = 0; 150 int ret = 0;
@@ -154,13 +159,13 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
154 mark->m.mnt = mnt; 159 mark->m.mnt = mnt;
155 160
156 /* is mark the first mark? */ 161 /* is mark the first mark? */
157 if (hlist_empty(&mnt->mnt_fsnotify_marks)) { 162 if (hlist_empty(&m->mnt_fsnotify_marks)) {
158 hlist_add_head_rcu(&mark->m.m_list, &mnt->mnt_fsnotify_marks); 163 hlist_add_head_rcu(&mark->m.m_list, &m->mnt_fsnotify_marks);
159 goto out; 164 goto out;
160 } 165 }
161 166
162 /* should mark be in the middle of the current list? */ 167 /* should mark be in the middle of the current list? */
163 hlist_for_each_entry(lmark, node, &mnt->mnt_fsnotify_marks, m.m_list) { 168 hlist_for_each_entry(lmark, node, &m->mnt_fsnotify_marks, m.m_list) {
164 last = node; 169 last = node;
165 170
166 if ((lmark->group == group) && !allow_dups) { 171 if ((lmark->group == group) && !allow_dups) {
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 97e2dacbc867..2eaa66652944 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -335,7 +335,6 @@ struct inode *ntfs_alloc_big_inode(struct super_block *sb)
335static void ntfs_i_callback(struct rcu_head *head) 335static void ntfs_i_callback(struct rcu_head *head)
336{ 336{
337 struct inode *inode = container_of(head, struct inode, i_rcu); 337 struct inode *inode = container_of(head, struct inode, i_rcu);
338 INIT_LIST_HEAD(&inode->i_dentry);
339 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); 338 kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode));
340} 339}
341 340
@@ -2301,16 +2300,16 @@ void ntfs_evict_big_inode(struct inode *vi)
2301/** 2300/**
2302 * ntfs_show_options - show mount options in /proc/mounts 2301 * ntfs_show_options - show mount options in /proc/mounts
2303 * @sf: seq_file in which to write our mount options 2302 * @sf: seq_file in which to write our mount options
2304 * @mnt: vfs mount whose mount options to display 2303 * @root: root of the mounted tree whose mount options to display
2305 * 2304 *
2306 * Called by the VFS once for each mounted ntfs volume when someone reads 2305 * Called by the VFS once for each mounted ntfs volume when someone reads
2307 * /proc/mounts in order to display the NTFS specific mount options of each 2306 * /proc/mounts in order to display the NTFS specific mount options of each
2308 * mount. The mount options of the vfs mount @mnt are written to the seq file 2307 * mount. The mount options of fs specified by @root are written to the seq file
2309 * @sf and success is returned. 2308 * @sf and success is returned.
2310 */ 2309 */
2311int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) 2310int ntfs_show_options(struct seq_file *sf, struct dentry *root)
2312{ 2311{
2313 ntfs_volume *vol = NTFS_SB(mnt->mnt_sb); 2312 ntfs_volume *vol = NTFS_SB(root->d_sb);
2314 int i; 2313 int i;
2315 2314
2316 seq_printf(sf, ",uid=%i", vol->uid); 2315 seq_printf(sf, ",uid=%i", vol->uid);
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index fe8e7e928889..db29695f845c 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -298,7 +298,7 @@ extern void ntfs_clear_extent_inode(ntfs_inode *ni);
298 298
299extern int ntfs_read_inode_mount(struct inode *vi); 299extern int ntfs_read_inode_mount(struct inode *vi);
300 300
301extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt); 301extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
302 302
303#ifdef NTFS_RW 303#ifdef NTFS_RW
304 304
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index b52706da4645..608be4516091 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -104,7 +104,7 @@ static bool parse_options(ntfs_volume *vol, char *opt)
104 int errors = 0, sloppy = 0; 104 int errors = 0, sloppy = 0;
105 uid_t uid = (uid_t)-1; 105 uid_t uid = (uid_t)-1;
106 gid_t gid = (gid_t)-1; 106 gid_t gid = (gid_t)-1;
107 mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; 107 umode_t fmask = (umode_t)-1, dmask = (umode_t)-1;
108 int mft_zone_multiplier = -1, on_errors = -1; 108 int mft_zone_multiplier = -1, on_errors = -1;
109 int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1; 109 int show_sys_files = -1, case_sensitive = -1, disable_sparse = -1;
110 struct nls_table *nls_map = NULL, *old_nls; 110 struct nls_table *nls_map = NULL, *old_nls;
@@ -287,9 +287,9 @@ no_mount_options:
287 vol->uid = uid; 287 vol->uid = uid;
288 if (gid != (gid_t)-1) 288 if (gid != (gid_t)-1)
289 vol->gid = gid; 289 vol->gid = gid;
290 if (fmask != (mode_t)-1) 290 if (fmask != (umode_t)-1)
291 vol->fmask = fmask; 291 vol->fmask = fmask;
292 if (dmask != (mode_t)-1) 292 if (dmask != (umode_t)-1)
293 vol->dmask = dmask; 293 vol->dmask = dmask;
294 if (show_sys_files != -1) { 294 if (show_sys_files != -1) {
295 if (show_sys_files) 295 if (show_sys_files)
diff --git a/fs/ntfs/volume.h b/fs/ntfs/volume.h
index 406ab55dfb32..15e3ba8d521a 100644
--- a/fs/ntfs/volume.h
+++ b/fs/ntfs/volume.h
@@ -48,8 +48,8 @@ typedef struct {
48 unsigned long flags; /* Miscellaneous flags, see below. */ 48 unsigned long flags; /* Miscellaneous flags, see below. */
49 uid_t uid; /* uid that files will be mounted as. */ 49 uid_t uid; /* uid that files will be mounted as. */
50 gid_t gid; /* gid that files will be mounted as. */ 50 gid_t gid; /* gid that files will be mounted as. */
51 mode_t fmask; /* The mask for file permissions. */ 51 umode_t fmask; /* The mask for file permissions. */
52 mode_t dmask; /* The mask for directory 52 umode_t dmask; /* The mask for directory
53 permissions. */ 53 permissions. */
54 u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ 54 u8 mft_zone_multiplier; /* Initial mft zone multiplier. */
55 u8 on_errors; /* What to do on filesystem errors. */ 55 u8 on_errors; /* What to do on filesystem errors. */
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index dc45deb19e68..73ba81928bce 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -553,7 +553,7 @@ void o2net_debugfs_exit(void)
553 553
554int o2net_debugfs_init(void) 554int o2net_debugfs_init(void)
555{ 555{
556 mode_t mode = S_IFREG|S_IRUSR; 556 umode_t mode = S_IFREG|S_IRUSR;
557 557
558 o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL); 558 o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
559 if (o2net_dentry) 559 if (o2net_dentry)
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index b42076797049..abfac0d7ae9c 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -354,7 +354,6 @@ static struct inode *dlmfs_alloc_inode(struct super_block *sb)
354static void dlmfs_i_callback(struct rcu_head *head) 354static void dlmfs_i_callback(struct rcu_head *head)
355{ 355{
356 struct inode *inode = container_of(head, struct inode, i_rcu); 356 struct inode *inode = container_of(head, struct inode, i_rcu);
357 INIT_LIST_HEAD(&inode->i_dentry);
358 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); 357 kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode));
359} 358}
360 359
@@ -401,16 +400,14 @@ static struct backing_dev_info dlmfs_backing_dev_info = {
401static struct inode *dlmfs_get_root_inode(struct super_block *sb) 400static struct inode *dlmfs_get_root_inode(struct super_block *sb)
402{ 401{
403 struct inode *inode = new_inode(sb); 402 struct inode *inode = new_inode(sb);
404 int mode = S_IFDIR | 0755; 403 umode_t mode = S_IFDIR | 0755;
405 struct dlmfs_inode_private *ip; 404 struct dlmfs_inode_private *ip;
406 405
407 if (inode) { 406 if (inode) {
408 ip = DLMFS_I(inode); 407 ip = DLMFS_I(inode);
409 408
410 inode->i_ino = get_next_ino(); 409 inode->i_ino = get_next_ino();
411 inode->i_mode = mode; 410 inode_init_owner(inode, NULL, mode);
412 inode->i_uid = current_fsuid();
413 inode->i_gid = current_fsgid();
414 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 411 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
415 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 412 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
416 inc_nlink(inode); 413 inc_nlink(inode);
@@ -424,7 +421,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
424 421
425static struct inode *dlmfs_get_inode(struct inode *parent, 422static struct inode *dlmfs_get_inode(struct inode *parent,
426 struct dentry *dentry, 423 struct dentry *dentry,
427 int mode) 424 umode_t mode)
428{ 425{
429 struct super_block *sb = parent->i_sb; 426 struct super_block *sb = parent->i_sb;
430 struct inode * inode = new_inode(sb); 427 struct inode * inode = new_inode(sb);
@@ -434,9 +431,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
434 return NULL; 431 return NULL;
435 432
436 inode->i_ino = get_next_ino(); 433 inode->i_ino = get_next_ino();
437 inode->i_mode = mode; 434 inode_init_owner(inode, parent, mode);
438 inode->i_uid = current_fsuid();
439 inode->i_gid = current_fsgid();
440 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; 435 inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
441 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 436 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
442 437
@@ -473,13 +468,6 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
473 inc_nlink(inode); 468 inc_nlink(inode);
474 break; 469 break;
475 } 470 }
476
477 if (parent->i_mode & S_ISGID) {
478 inode->i_gid = parent->i_gid;
479 if (S_ISDIR(mode))
480 inode->i_mode |= S_ISGID;
481 }
482
483 return inode; 471 return inode;
484} 472}
485 473
@@ -489,7 +477,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
489/* SMP-safe */ 477/* SMP-safe */
490static int dlmfs_mkdir(struct inode * dir, 478static int dlmfs_mkdir(struct inode * dir,
491 struct dentry * dentry, 479 struct dentry * dentry,
492 int mode) 480 umode_t mode)
493{ 481{
494 int status; 482 int status;
495 struct inode *inode = NULL; 483 struct inode *inode = NULL;
@@ -537,7 +525,7 @@ bail:
537 525
538static int dlmfs_create(struct inode *dir, 526static int dlmfs_create(struct inode *dir,
539 struct dentry *dentry, 527 struct dentry *dentry,
540 int mode, 528 umode_t mode,
541 struct nameidata *nd) 529 struct nameidata *nd)
542{ 530{
543 int status = 0; 531 int status = 0;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6e396683c3d4..061591a3ab08 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2128,7 +2128,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
2128 * remove_suid() calls ->setattr without any hint that 2128 * remove_suid() calls ->setattr without any hint that
2129 * we may have already done our cluster locking. Since 2129 * we may have already done our cluster locking. Since
2130 * ocfs2_setattr() *must* take cluster locks to 2130 * ocfs2_setattr() *must* take cluster locks to
2131 * proceeed, this will lead us to recursively lock the 2131 * proceed, this will lead us to recursively lock the
2132 * inode. There's also the dinode i_size state which 2132 * inode. There's also the dinode i_size state which
2133 * can be lost via setattr during extending writes (we 2133 * can be lost via setattr during extending writes (we
2134 * set inode->i_size at the end of a write. */ 2134 * set inode->i_size at the end of a write. */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 726ff265b296..a6fda3c188aa 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -906,12 +906,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
906 if (get_user(flags, (int __user *) arg)) 906 if (get_user(flags, (int __user *) arg))
907 return -EFAULT; 907 return -EFAULT;
908 908
909 status = mnt_want_write(filp->f_path.mnt); 909 status = mnt_want_write_file(filp);
910 if (status) 910 if (status)
911 return status; 911 return status;
912 status = ocfs2_set_inode_attr(inode, flags, 912 status = ocfs2_set_inode_attr(inode, flags,
913 OCFS2_FL_MODIFIABLE); 913 OCFS2_FL_MODIFIABLE);
914 mnt_drop_write(filp->f_path.mnt); 914 mnt_drop_write_file(filp);
915 return status; 915 return status;
916 case OCFS2_IOC_RESVSP: 916 case OCFS2_IOC_RESVSP:
917 case OCFS2_IOC_RESVSP64: 917 case OCFS2_IOC_RESVSP64:
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 184c76b8c293..b1e3fce72ea4 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -1059,7 +1059,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
1059 struct ocfs2_move_extents range; 1059 struct ocfs2_move_extents range;
1060 struct ocfs2_move_extents_context *context = NULL; 1060 struct ocfs2_move_extents_context *context = NULL;
1061 1061
1062 status = mnt_want_write(filp->f_path.mnt); 1062 status = mnt_want_write_file(filp);
1063 if (status) 1063 if (status)
1064 return status; 1064 return status;
1065 1065
@@ -1145,7 +1145,7 @@ out:
1145 1145
1146 kfree(context); 1146 kfree(context);
1147 1147
1148 mnt_drop_write(filp->f_path.mnt); 1148 mnt_drop_write_file(filp);
1149 1149
1150 return status; 1150 return status;
1151} 1151}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index a8b2bfea574e..be244692550d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -185,7 +185,7 @@ bail:
185 return ret; 185 return ret;
186} 186}
187 187
188static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode) 188static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
189{ 189{
190 struct inode *inode; 190 struct inode *inode;
191 191
@@ -207,7 +207,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, int mode)
207 207
208static int ocfs2_mknod(struct inode *dir, 208static int ocfs2_mknod(struct inode *dir,
209 struct dentry *dentry, 209 struct dentry *dentry,
210 int mode, 210 umode_t mode,
211 dev_t dev) 211 dev_t dev)
212{ 212{
213 int status = 0; 213 int status = 0;
@@ -602,7 +602,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
602 602
603static int ocfs2_mkdir(struct inode *dir, 603static int ocfs2_mkdir(struct inode *dir,
604 struct dentry *dentry, 604 struct dentry *dentry,
605 int mode) 605 umode_t mode)
606{ 606{
607 int ret; 607 int ret;
608 608
@@ -617,7 +617,7 @@ static int ocfs2_mkdir(struct inode *dir,
617 617
618static int ocfs2_create(struct inode *dir, 618static int ocfs2_create(struct inode *dir,
619 struct dentry *dentry, 619 struct dentry *dentry,
620 int mode, 620 umode_t mode,
621 struct nameidata *nd) 621 struct nameidata *nd)
622{ 622{
623 int ret; 623 int ret;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4994f8b0e604..604e12c4e979 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -108,7 +108,7 @@ static int ocfs2_parse_options(struct super_block *sb, char *options,
108 int is_remount); 108 int is_remount);
109static int ocfs2_check_set_options(struct super_block *sb, 109static int ocfs2_check_set_options(struct super_block *sb,
110 struct mount_options *options); 110 struct mount_options *options);
111static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt); 111static int ocfs2_show_options(struct seq_file *s, struct dentry *root);
112static void ocfs2_put_super(struct super_block *sb); 112static void ocfs2_put_super(struct super_block *sb);
113static int ocfs2_mount_volume(struct super_block *sb); 113static int ocfs2_mount_volume(struct super_block *sb);
114static int ocfs2_remount(struct super_block *sb, int *flags, char *data); 114static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@@ -569,7 +569,6 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
569static void ocfs2_i_callback(struct rcu_head *head) 569static void ocfs2_i_callback(struct rcu_head *head)
570{ 570{
571 struct inode *inode = container_of(head, struct inode, i_rcu); 571 struct inode *inode = container_of(head, struct inode, i_rcu);
572 INIT_LIST_HEAD(&inode->i_dentry);
573 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 572 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
574} 573}
575 574
@@ -1534,9 +1533,9 @@ bail:
1534 return status; 1533 return status;
1535} 1534}
1536 1535
1537static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) 1536static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
1538{ 1537{
1539 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); 1538 struct ocfs2_super *osb = OCFS2_SB(root->d_sb);
1540 unsigned long opts = osb->s_mount_opt; 1539 unsigned long opts = osb->s_mount_opt;
1541 unsigned int local_alloc_megs; 1540 unsigned int local_alloc_megs;
1542 1541
@@ -1568,8 +1567,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1568 if (osb->preferred_slot != OCFS2_INVALID_SLOT) 1567 if (osb->preferred_slot != OCFS2_INVALID_SLOT)
1569 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot); 1568 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
1570 1569
1571 if (!(mnt->mnt_flags & MNT_NOATIME) && !(mnt->mnt_flags & MNT_RELATIME)) 1570 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
1572 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
1573 1571
1574 if (osb->osb_commit_interval) 1572 if (osb->osb_commit_interval)
1575 seq_printf(s, ",commit=%u", 1573 seq_printf(s, ",commit=%u",
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index aa9e8777b09a..0ba9ea1e7961 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -623,7 +623,7 @@ int ocfs2_calc_security_init(struct inode *dir,
623 623
624int ocfs2_calc_xattr_init(struct inode *dir, 624int ocfs2_calc_xattr_init(struct inode *dir,
625 struct buffer_head *dir_bh, 625 struct buffer_head *dir_bh,
626 int mode, 626 umode_t mode,
627 struct ocfs2_security_xattr_info *si, 627 struct ocfs2_security_xattr_info *si,
628 int *want_clusters, 628 int *want_clusters,
629 int *xattr_credits, 629 int *xattr_credits,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index d63cfb72316b..e5c7f15465b4 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -68,7 +68,7 @@ int ocfs2_calc_security_init(struct inode *,
68 struct ocfs2_security_xattr_info *, 68 struct ocfs2_security_xattr_info *,
69 int *, int *, struct ocfs2_alloc_context **); 69 int *, int *, struct ocfs2_alloc_context **);
70int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *, 70int ocfs2_calc_xattr_init(struct inode *, struct buffer_head *,
71 int, struct ocfs2_security_xattr_info *, 71 umode_t, struct ocfs2_security_xattr_info *,
72 int *, int *, int *); 72 int *, int *, int *);
73 73
74/* 74/*
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 98e544274390..f00576ec320f 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -255,7 +255,7 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry)
255 return 0; 255 return 0;
256} 256}
257 257
258static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode) 258static int omfs_add_node(struct inode *dir, struct dentry *dentry, umode_t mode)
259{ 259{
260 int err; 260 int err;
261 struct inode *inode = omfs_new_inode(dir, mode); 261 struct inode *inode = omfs_new_inode(dir, mode);
@@ -279,12 +279,12 @@ out_free_inode:
279 return err; 279 return err;
280} 280}
281 281
282static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 282static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
283{ 283{
284 return omfs_add_node(dir, dentry, mode | S_IFDIR); 284 return omfs_add_node(dir, dentry, mode | S_IFDIR);
285} 285}
286 286
287static int omfs_create(struct inode *dir, struct dentry *dentry, int mode, 287static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
288 struct nameidata *nd) 288 struct nameidata *nd)
289{ 289{
290 return omfs_add_node(dir, dentry, mode | S_IFREG); 290 return omfs_add_node(dir, dentry, mode | S_IFREG);
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index e043c4cb9a97..6065bb0ba207 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -28,7 +28,7 @@ struct buffer_head *omfs_bread(struct super_block *sb, sector_t block)
28 return sb_bread(sb, clus_to_blk(sbi, block)); 28 return sb_bread(sb, clus_to_blk(sbi, block));
29} 29}
30 30
31struct inode *omfs_new_inode(struct inode *dir, int mode) 31struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
32{ 32{
33 struct inode *inode; 33 struct inode *inode;
34 u64 new_block; 34 u64 new_block;
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index 7d414fef501a..8941f12c6b01 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -60,7 +60,7 @@ extern int omfs_shrink_inode(struct inode *inode);
60/* inode.c */ 60/* inode.c */
61extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block); 61extern struct buffer_head *omfs_bread(struct super_block *sb, sector_t block);
62extern struct inode *omfs_iget(struct super_block *sb, ino_t inode); 62extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
63extern struct inode *omfs_new_inode(struct inode *dir, int mode); 63extern struct inode *omfs_new_inode(struct inode *dir, umode_t mode);
64extern int omfs_reserve_block(struct super_block *sb, sector_t block); 64extern int omfs_reserve_block(struct super_block *sb, sector_t block);
65extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino); 65extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino);
66extern int omfs_sync_inode(struct inode *inode); 66extern int omfs_sync_inode(struct inode *inode);
diff --git a/fs/open.c b/fs/open.c
index 22c41b543f2d..77becc041149 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -456,7 +456,7 @@ static int chmod_common(struct path *path, umode_t mode)
456 if (error) 456 if (error)
457 return error; 457 return error;
458 mutex_lock(&inode->i_mutex); 458 mutex_lock(&inode->i_mutex);
459 error = security_path_chmod(path->dentry, path->mnt, mode); 459 error = security_path_chmod(path, mode);
460 if (error) 460 if (error)
461 goto out_unlock; 461 goto out_unlock;
462 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 462 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
@@ -468,7 +468,7 @@ out_unlock:
468 return error; 468 return error;
469} 469}
470 470
471SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) 471SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
472{ 472{
473 struct file * file; 473 struct file * file;
474 int err = -EBADF; 474 int err = -EBADF;
@@ -482,7 +482,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
482 return err; 482 return err;
483} 483}
484 484
485SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) 485SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
486{ 486{
487 struct path path; 487 struct path path;
488 int error; 488 int error;
@@ -495,7 +495,7 @@ SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
495 return error; 495 return error;
496} 496}
497 497
498SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode) 498SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
499{ 499{
500 return sys_fchmodat(AT_FDCWD, filename, mode); 500 return sys_fchmodat(AT_FDCWD, filename, mode);
501} 501}
@@ -608,7 +608,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
608 dentry = file->f_path.dentry; 608 dentry = file->f_path.dentry;
609 audit_inode(NULL, dentry); 609 audit_inode(NULL, dentry);
610 error = chown_common(&file->f_path, user, group); 610 error = chown_common(&file->f_path, user, group);
611 mnt_drop_write(file->f_path.mnt); 611 mnt_drop_write_file(file);
612out_fput: 612out_fput:
613 fput(file); 613 fput(file);
614out: 614out:
@@ -877,7 +877,7 @@ void fd_install(unsigned int fd, struct file *file)
877 877
878EXPORT_SYMBOL(fd_install); 878EXPORT_SYMBOL(fd_install);
879 879
880static inline int build_open_flags(int flags, int mode, struct open_flags *op) 880static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
881{ 881{
882 int lookup_flags = 0; 882 int lookup_flags = 0;
883 int acc_mode; 883 int acc_mode;
@@ -948,7 +948,7 @@ static inline int build_open_flags(int flags, int mode, struct open_flags *op)
948 * have to. But in generally you should not do this, so please move 948 * have to. But in generally you should not do this, so please move
949 * along, nothing to see here.. 949 * along, nothing to see here..
950 */ 950 */
951struct file *filp_open(const char *filename, int flags, int mode) 951struct file *filp_open(const char *filename, int flags, umode_t mode)
952{ 952{
953 struct open_flags op; 953 struct open_flags op;
954 int lookup = build_open_flags(flags, mode, &op); 954 int lookup = build_open_flags(flags, mode, &op);
@@ -970,7 +970,7 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
970} 970}
971EXPORT_SYMBOL(file_open_root); 971EXPORT_SYMBOL(file_open_root);
972 972
973long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 973long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
974{ 974{
975 struct open_flags op; 975 struct open_flags op;
976 int lookup = build_open_flags(flags, mode, &op); 976 int lookup = build_open_flags(flags, mode, &op);
@@ -994,7 +994,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
994 return fd; 994 return fd;
995} 995}
996 996
997SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode) 997SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
998{ 998{
999 long ret; 999 long ret;
1000 1000
@@ -1008,7 +1008,7 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
1008} 1008}
1009 1009
1010SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, 1010SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1011 int, mode) 1011 umode_t, mode)
1012{ 1012{
1013 long ret; 1013 long ret;
1014 1014
@@ -1027,7 +1027,7 @@ SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
1027 * For backward compatibility? Maybe this should be moved 1027 * For backward compatibility? Maybe this should be moved
1028 * into arch/i386 instead? 1028 * into arch/i386 instead?
1029 */ 1029 */
1030SYSCALL_DEFINE2(creat, const char __user *, pathname, int, mode) 1030SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
1031{ 1031{
1032 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); 1032 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
1033} 1033}
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index e4e0ff7962e2..a88c03bc749d 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -346,7 +346,6 @@ static struct inode *openprom_alloc_inode(struct super_block *sb)
346static void openprom_i_callback(struct rcu_head *head) 346static void openprom_i_callback(struct rcu_head *head)
347{ 347{
348 struct inode *inode = container_of(head, struct inode, i_rcu); 348 struct inode *inode = container_of(head, struct inode, i_rcu);
349 INIT_LIST_HEAD(&inode->i_dentry);
350 kmem_cache_free(op_inode_cachep, OP_I(inode)); 349 kmem_cache_free(op_inode_cachep, OP_I(inode));
351} 350}
352 351
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
deleted file mode 100644
index cb5f0a3f1b03..000000000000
--- a/fs/partitions/Kconfig
+++ /dev/null
@@ -1,251 +0,0 @@
1#
2# Partition configuration
3#
4config PARTITION_ADVANCED
5 bool "Advanced partition selection"
6 help
7 Say Y here if you would like to use hard disks under Linux which
8 were partitioned under an operating system running on a different
9 architecture than your Linux system.
10
11 Note that the answer to this question won't directly affect the
12 kernel: saying N will just cause the configurator to skip all
13 the questions about foreign partitioning schemes.
14
15 If unsure, say N.
16
17config ACORN_PARTITION
18 bool "Acorn partition support" if PARTITION_ADVANCED
19 default y if ARCH_ACORN
20 help
21 Support hard disks partitioned under Acorn operating systems.
22
23config ACORN_PARTITION_CUMANA
24 bool "Cumana partition support" if PARTITION_ADVANCED
25 default y if ARCH_ACORN
26 depends on ACORN_PARTITION
27 help
28 Say Y here if you would like to use hard disks under Linux which
29 were partitioned using the Cumana interface on Acorn machines.
30
31config ACORN_PARTITION_EESOX
32 bool "EESOX partition support" if PARTITION_ADVANCED
33 default y if ARCH_ACORN
34 depends on ACORN_PARTITION
35
36config ACORN_PARTITION_ICS
37 bool "ICS partition support" if PARTITION_ADVANCED
38 default y if ARCH_ACORN
39 depends on ACORN_PARTITION
40 help
41 Say Y here if you would like to use hard disks under Linux which
42 were partitioned using the ICS interface on Acorn machines.
43
44config ACORN_PARTITION_ADFS
45 bool "Native filecore partition support" if PARTITION_ADVANCED
46 default y if ARCH_ACORN
47 depends on ACORN_PARTITION
48 help
49 The Acorn Disc Filing System is the standard file system of the
50 RiscOS operating system which runs on Acorn's ARM-based Risc PC
51 systems and the Acorn Archimedes range of machines. If you say
52 `Y' here, Linux will support disk partitions created under ADFS.
53
54config ACORN_PARTITION_POWERTEC
55 bool "PowerTec partition support" if PARTITION_ADVANCED
56 default y if ARCH_ACORN
57 depends on ACORN_PARTITION
58 help
59 Support reading partition tables created on Acorn machines using
60 the PowerTec SCSI drive.
61
62config ACORN_PARTITION_RISCIX
63 bool "RISCiX partition support" if PARTITION_ADVANCED
64 default y if ARCH_ACORN
65 depends on ACORN_PARTITION
66 help
67 Once upon a time, there was a native Unix port for the Acorn series
68 of machines called RISCiX. If you say 'Y' here, Linux will be able
69 to read disks partitioned under RISCiX.
70
71config OSF_PARTITION
72 bool "Alpha OSF partition support" if PARTITION_ADVANCED
73 default y if ALPHA
74 help
75 Say Y here if you would like to use hard disks under Linux which
76 were partitioned on an Alpha machine.
77
78config AMIGA_PARTITION
79 bool "Amiga partition table support" if PARTITION_ADVANCED
80 default y if (AMIGA || AFFS_FS=y)
81 help
82 Say Y here if you would like to use hard disks under Linux which
83 were partitioned under AmigaOS.
84
85config ATARI_PARTITION
86 bool "Atari partition table support" if PARTITION_ADVANCED
87 default y if ATARI
88 help
89 Say Y here if you would like to use hard disks under Linux which
90 were partitioned under the Atari OS.
91
92config IBM_PARTITION
93 bool "IBM disk label and partition support"
94 depends on PARTITION_ADVANCED && S390
95 help
96 Say Y here if you would like to be able to read the hard disk
97 partition table format used by IBM DASD disks operating under CMS.
98 Otherwise, say N.
99
100config MAC_PARTITION
101 bool "Macintosh partition map support" if PARTITION_ADVANCED
102 default y if (MAC || PPC_PMAC)
103 help
104 Say Y here if you would like to use hard disks under Linux which
105 were partitioned on a Macintosh.
106
107config MSDOS_PARTITION
108 bool "PC BIOS (MSDOS partition tables) support" if PARTITION_ADVANCED
109 default y
110 help
111 Say Y here.
112
113config BSD_DISKLABEL
114 bool "BSD disklabel (FreeBSD partition tables) support"
115 depends on PARTITION_ADVANCED && MSDOS_PARTITION
116 help
117 FreeBSD uses its own hard disk partition scheme on your PC. It
118 requires only one entry in the primary partition table of your disk
119 and manages it similarly to DOS extended partitions, putting in its
120 first sector a new partition table in BSD disklabel format. Saying Y
121 here allows you to read these disklabels and further mount FreeBSD
122 partitions from within Linux if you have also said Y to "UFS
123 file system support", above. If you don't know what all this is
124 about, say N.
125
126config MINIX_SUBPARTITION
127 bool "Minix subpartition support"
128 depends on PARTITION_ADVANCED && MSDOS_PARTITION
129 help
130 Minix 2.0.0/2.0.2 subpartition table support for Linux.
131 Say Y here if you want to mount and use Minix 2.0.0/2.0.2
132 subpartitions.
133
134config SOLARIS_X86_PARTITION
135 bool "Solaris (x86) partition table support"
136 depends on PARTITION_ADVANCED && MSDOS_PARTITION
137 help
138 Like most systems, Solaris x86 uses its own hard disk partition
139 table format, incompatible with all others. Saying Y here allows you
140 to read these partition tables and further mount Solaris x86
141 partitions from within Linux if you have also said Y to "UFS
142 file system support", above.
143
144config UNIXWARE_DISKLABEL
145 bool "Unixware slices support"
146 depends on PARTITION_ADVANCED && MSDOS_PARTITION
147 ---help---
148 Like some systems, UnixWare uses its own slice table inside a
149 partition (VTOC - Virtual Table of Contents). Its format is
150 incompatible with all other OSes. Saying Y here allows you to read
151 VTOC and further mount UnixWare partitions read-only from within
152 Linux if you have also said Y to "UFS file system support" or
153 "System V and Coherent file system support", above.
154
155 This is mainly used to carry data from a UnixWare box to your
156 Linux box via a removable medium like magneto-optical, ZIP or
157 removable IDE drives. Note, however, that a good portable way to
158 transport files and directories between unixes (and even other
159 operating systems) is given by the tar program ("man tar" or
160 preferably "info tar").
161
162 If you don't know what all this is about, say N.
163
164config LDM_PARTITION
165 bool "Windows Logical Disk Manager (Dynamic Disk) support"
166 depends on PARTITION_ADVANCED
167 ---help---
168 Say Y here if you would like to use hard disks under Linux which
169 were partitioned using Windows 2000's/XP's or Vista's Logical Disk
170 Manager. They are also known as "Dynamic Disks".
171
172 Note this driver only supports Dynamic Disks with a protective MBR
173 label, i.e. DOS partition table. It does not support GPT labelled
174 Dynamic Disks yet as can be created with Vista.
175
176 Windows 2000 introduced the concept of Dynamic Disks to get around
177 the limitations of the PC's partitioning scheme. The Logical Disk
178 Manager allows the user to repartition a disk and create spanned,
179 mirrored, striped or RAID volumes, all without the need for
180 rebooting.
181
182 Normal partitions are now called Basic Disks under Windows 2000, XP,
183 and Vista.
184
185 For a fuller description read <file:Documentation/ldm.txt>.
186
187 If unsure, say N.
188
189config LDM_DEBUG
190 bool "Windows LDM extra logging"
191 depends on LDM_PARTITION
192 help
193 Say Y here if you would like LDM to log verbosely. This could be
194 helpful if the driver doesn't work as expected and you'd like to
195 report a bug.
196
197 If unsure, say N.
198
199config SGI_PARTITION
200 bool "SGI partition support" if PARTITION_ADVANCED
201 default y if DEFAULT_SGI_PARTITION
202 help
203 Say Y here if you would like to be able to read the hard disk
204 partition table format used by SGI machines.
205
206config ULTRIX_PARTITION
207 bool "Ultrix partition table support" if PARTITION_ADVANCED
208 default y if MACH_DECSTATION
209 help
210 Say Y here if you would like to be able to read the hard disk
211 partition table format used by DEC (now Compaq) Ultrix machines.
212 Otherwise, say N.
213
214config SUN_PARTITION
215 bool "Sun partition tables support" if PARTITION_ADVANCED
216 default y if (SPARC || SUN3 || SUN3X)
217 ---help---
218 Like most systems, SunOS uses its own hard disk partition table
219 format, incompatible with all others. Saying Y here allows you to
220 read these partition tables and further mount SunOS partitions from
221 within Linux if you have also said Y to "UFS file system support",
222 above. This is mainly used to carry data from a SPARC under SunOS to
223 your Linux box via a removable medium like magneto-optical or ZIP
224 drives; note however that a good portable way to transport files and
225 directories between unixes (and even other operating systems) is
226 given by the tar program ("man tar" or preferably "info tar"). If
227 you don't know what all this is about, say N.
228
229config KARMA_PARTITION
230 bool "Karma Partition support"
231 depends on PARTITION_ADVANCED
232 help
233 Say Y here if you would like to mount the Rio Karma MP3 player, as it
234 uses a proprietary partition table.
235
236config EFI_PARTITION
237 bool "EFI GUID Partition support"
238 depends on PARTITION_ADVANCED
239 select CRC32
240 help
241 Say Y here if you would like to use hard disks under Linux which
242 were partitioned using EFI GPT.
243
244config SYSV68_PARTITION
245 bool "SYSV68 partition table support" if PARTITION_ADVANCED
246 default y if VME
247 help
248 Say Y here if you would like to be able to read the hard disk
249 partition table format used by Motorola Delta machines (using
250 sysv68).
251 Otherwise, say N.
diff --git a/fs/partitions/Makefile b/fs/partitions/Makefile
deleted file mode 100644
index 03af8eac51da..000000000000
--- a/fs/partitions/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
1#
2# Makefile for the linux kernel.
3#
4
5obj-$(CONFIG_BLOCK) := check.o
6
7obj-$(CONFIG_ACORN_PARTITION) += acorn.o
8obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
9obj-$(CONFIG_ATARI_PARTITION) += atari.o
10obj-$(CONFIG_MAC_PARTITION) += mac.o
11obj-$(CONFIG_LDM_PARTITION) += ldm.o
12obj-$(CONFIG_MSDOS_PARTITION) += msdos.o
13obj-$(CONFIG_OSF_PARTITION) += osf.o
14obj-$(CONFIG_SGI_PARTITION) += sgi.o
15obj-$(CONFIG_SUN_PARTITION) += sun.o
16obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o
17obj-$(CONFIG_IBM_PARTITION) += ibm.o
18obj-$(CONFIG_EFI_PARTITION) += efi.o
19obj-$(CONFIG_KARMA_PARTITION) += karma.o
20obj-$(CONFIG_SYSV68_PARTITION) += sysv68.o
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c
deleted file mode 100644
index fbeb697374d5..000000000000
--- a/fs/partitions/acorn.c
+++ /dev/null
@@ -1,556 +0,0 @@
1/*
2 * linux/fs/partitions/acorn.c
3 *
4 * Copyright (c) 1996-2000 Russell King.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * Scan ADFS partitions on hard disk drives. Unfortunately, there
11 * isn't a standard for partitioning drives on Acorn machines, so
12 * every single manufacturer of SCSI and IDE cards created their own
13 * method.
14 */
15#include <linux/buffer_head.h>
16#include <linux/adfs_fs.h>
17
18#include "check.h"
19#include "acorn.h"
20
21/*
22 * Partition types. (Oh for reusability)
23 */
24#define PARTITION_RISCIX_MFM 1
25#define PARTITION_RISCIX_SCSI 2
26#define PARTITION_LINUX 9
27
28#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
29 defined(CONFIG_ACORN_PARTITION_ADFS)
30static struct adfs_discrecord *
31adfs_partition(struct parsed_partitions *state, char *name, char *data,
32 unsigned long first_sector, int slot)
33{
34 struct adfs_discrecord *dr;
35 unsigned int nr_sects;
36
37 if (adfs_checkbblk(data))
38 return NULL;
39
40 dr = (struct adfs_discrecord *)(data + 0x1c0);
41
42 if (dr->disc_size == 0 && dr->disc_size_high == 0)
43 return NULL;
44
45 nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) |
46 (le32_to_cpu(dr->disc_size) >> 9);
47
48 if (name) {
49 strlcat(state->pp_buf, " [", PAGE_SIZE);
50 strlcat(state->pp_buf, name, PAGE_SIZE);
51 strlcat(state->pp_buf, "]", PAGE_SIZE);
52 }
53 put_partition(state, slot, first_sector, nr_sects);
54 return dr;
55}
56#endif
57
58#ifdef CONFIG_ACORN_PARTITION_RISCIX
59
60struct riscix_part {
61 __le32 start;
62 __le32 length;
63 __le32 one;
64 char name[16];
65};
66
67struct riscix_record {
68 __le32 magic;
69#define RISCIX_MAGIC cpu_to_le32(0x4a657320)
70 __le32 date;
71 struct riscix_part part[8];
72};
73
74#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
75 defined(CONFIG_ACORN_PARTITION_ADFS)
76static int riscix_partition(struct parsed_partitions *state,
77 unsigned long first_sect, int slot,
78 unsigned long nr_sects)
79{
80 Sector sect;
81 struct riscix_record *rr;
82
83 rr = read_part_sector(state, first_sect, &sect);
84 if (!rr)
85 return -1;
86
87 strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE);
88
89
90 if (rr->magic == RISCIX_MAGIC) {
91 unsigned long size = nr_sects > 2 ? 2 : nr_sects;
92 int part;
93
94 strlcat(state->pp_buf, " <", PAGE_SIZE);
95
96 put_partition(state, slot++, first_sect, size);
97 for (part = 0; part < 8; part++) {
98 if (rr->part[part].one &&
99 memcmp(rr->part[part].name, "All\0", 4)) {
100 put_partition(state, slot++,
101 le32_to_cpu(rr->part[part].start),
102 le32_to_cpu(rr->part[part].length));
103 strlcat(state->pp_buf, "(", PAGE_SIZE);
104 strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE);
105 strlcat(state->pp_buf, ")", PAGE_SIZE);
106 }
107 }
108
109 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
110 } else {
111 put_partition(state, slot++, first_sect, nr_sects);
112 }
113
114 put_dev_sector(sect);
115 return slot;
116}
117#endif
118#endif
119
120#define LINUX_NATIVE_MAGIC 0xdeafa1de
121#define LINUX_SWAP_MAGIC 0xdeafab1e
122
123struct linux_part {
124 __le32 magic;
125 __le32 start_sect;
126 __le32 nr_sects;
127};
128
129#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \
130 defined(CONFIG_ACORN_PARTITION_ADFS)
131static int linux_partition(struct parsed_partitions *state,
132 unsigned long first_sect, int slot,
133 unsigned long nr_sects)
134{
135 Sector sect;
136 struct linux_part *linuxp;
137 unsigned long size = nr_sects > 2 ? 2 : nr_sects;
138
139 strlcat(state->pp_buf, " [Linux]", PAGE_SIZE);
140
141 put_partition(state, slot++, first_sect, size);
142
143 linuxp = read_part_sector(state, first_sect, &sect);
144 if (!linuxp)
145 return -1;
146
147 strlcat(state->pp_buf, " <", PAGE_SIZE);
148 while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) ||
149 linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) {
150 if (slot == state->limit)
151 break;
152 put_partition(state, slot++, first_sect +
153 le32_to_cpu(linuxp->start_sect),
154 le32_to_cpu(linuxp->nr_sects));
155 linuxp ++;
156 }
157 strlcat(state->pp_buf, " >", PAGE_SIZE);
158
159 put_dev_sector(sect);
160 return slot;
161}
162#endif
163
164#ifdef CONFIG_ACORN_PARTITION_CUMANA
165int adfspart_check_CUMANA(struct parsed_partitions *state)
166{
167 unsigned long first_sector = 0;
168 unsigned int start_blk = 0;
169 Sector sect;
170 unsigned char *data;
171 char *name = "CUMANA/ADFS";
172 int first = 1;
173 int slot = 1;
174
175 /*
176 * Try Cumana style partitions - sector 6 contains ADFS boot block
177 * with pointer to next 'drive'.
178 *
179 * There are unknowns in this code - is the 'cylinder number' of the
180 * next partition relative to the start of this one - I'm assuming
181 * it is.
182 *
183 * Also, which ID did Cumana use?
184 *
185 * This is totally unfinished, and will require more work to get it
186 * going. Hence it is totally untested.
187 */
188 do {
189 struct adfs_discrecord *dr;
190 unsigned int nr_sects;
191
192 data = read_part_sector(state, start_blk * 2 + 6, &sect);
193 if (!data)
194 return -1;
195
196 if (slot == state->limit)
197 break;
198
199 dr = adfs_partition(state, name, data, first_sector, slot++);
200 if (!dr)
201 break;
202
203 name = NULL;
204
205 nr_sects = (data[0x1fd] + (data[0x1fe] << 8)) *
206 (dr->heads + (dr->lowsector & 0x40 ? 1 : 0)) *
207 dr->secspertrack;
208
209 if (!nr_sects)
210 break;
211
212 first = 0;
213 first_sector += nr_sects;
214 start_blk += nr_sects >> (BLOCK_SIZE_BITS - 9);
215 nr_sects = 0; /* hmm - should be partition size */
216
217 switch (data[0x1fc] & 15) {
218 case 0: /* No partition / ADFS? */
219 break;
220
221#ifdef CONFIG_ACORN_PARTITION_RISCIX
222 case PARTITION_RISCIX_SCSI:
223 /* RISCiX - we don't know how to find the next one. */
224 slot = riscix_partition(state, first_sector, slot,
225 nr_sects);
226 break;
227#endif
228
229 case PARTITION_LINUX:
230 slot = linux_partition(state, first_sector, slot,
231 nr_sects);
232 break;
233 }
234 put_dev_sector(sect);
235 if (slot == -1)
236 return -1;
237 } while (1);
238 put_dev_sector(sect);
239 return first ? 0 : 1;
240}
241#endif
242
243#ifdef CONFIG_ACORN_PARTITION_ADFS
244/*
245 * Purpose: allocate ADFS partitions.
246 *
247 * Params : hd - pointer to gendisk structure to store partition info.
248 * dev - device number to access.
249 *
250 * Returns: -1 on error, 0 for no ADFS boot sector, 1 for ok.
251 *
252 * Alloc : hda = whole drive
253 * hda1 = ADFS partition on first drive.
254 * hda2 = non-ADFS partition.
255 */
256int adfspart_check_ADFS(struct parsed_partitions *state)
257{
258 unsigned long start_sect, nr_sects, sectscyl, heads;
259 Sector sect;
260 unsigned char *data;
261 struct adfs_discrecord *dr;
262 unsigned char id;
263 int slot = 1;
264
265 data = read_part_sector(state, 6, &sect);
266 if (!data)
267 return -1;
268
269 dr = adfs_partition(state, "ADFS", data, 0, slot++);
270 if (!dr) {
271 put_dev_sector(sect);
272 return 0;
273 }
274
275 heads = dr->heads + ((dr->lowsector >> 6) & 1);
276 sectscyl = dr->secspertrack * heads;
277 start_sect = ((data[0x1fe] << 8) + data[0x1fd]) * sectscyl;
278 id = data[0x1fc] & 15;
279 put_dev_sector(sect);
280
281 /*
282 * Work out start of non-adfs partition.
283 */
284 nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect;
285
286 if (start_sect) {
287 switch (id) {
288#ifdef CONFIG_ACORN_PARTITION_RISCIX
289 case PARTITION_RISCIX_SCSI:
290 case PARTITION_RISCIX_MFM:
291 slot = riscix_partition(state, start_sect, slot,
292 nr_sects);
293 break;
294#endif
295
296 case PARTITION_LINUX:
297 slot = linux_partition(state, start_sect, slot,
298 nr_sects);
299 break;
300 }
301 }
302 strlcat(state->pp_buf, "\n", PAGE_SIZE);
303 return 1;
304}
305#endif
306
307#ifdef CONFIG_ACORN_PARTITION_ICS
308
309struct ics_part {
310 __le32 start;
311 __le32 size;
312};
313
314static int adfspart_check_ICSLinux(struct parsed_partitions *state,
315 unsigned long block)
316{
317 Sector sect;
318 unsigned char *data = read_part_sector(state, block, &sect);
319 int result = 0;
320
321 if (data) {
322 if (memcmp(data, "LinuxPart", 9) == 0)
323 result = 1;
324 put_dev_sector(sect);
325 }
326
327 return result;
328}
329
330/*
331 * Check for a valid ICS partition using the checksum.
332 */
333static inline int valid_ics_sector(const unsigned char *data)
334{
335 unsigned long sum;
336 int i;
337
338 for (i = 0, sum = 0x50617274; i < 508; i++)
339 sum += data[i];
340
341 sum -= le32_to_cpu(*(__le32 *)(&data[508]));
342
343 return sum == 0;
344}
345
346/*
347 * Purpose: allocate ICS partitions.
348 * Params : hd - pointer to gendisk structure to store partition info.
349 * dev - device number to access.
350 * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok.
351 * Alloc : hda = whole drive
352 * hda1 = ADFS partition 0 on first drive.
353 * hda2 = ADFS partition 1 on first drive.
354 * ..etc..
355 */
356int adfspart_check_ICS(struct parsed_partitions *state)
357{
358 const unsigned char *data;
359 const struct ics_part *p;
360 int slot;
361 Sector sect;
362
363 /*
364 * Try ICS style partitions - sector 0 contains partition info.
365 */
366 data = read_part_sector(state, 0, &sect);
367 if (!data)
368 return -1;
369
370 if (!valid_ics_sector(data)) {
371 put_dev_sector(sect);
372 return 0;
373 }
374
375 strlcat(state->pp_buf, " [ICS]", PAGE_SIZE);
376
377 for (slot = 1, p = (const struct ics_part *)data; p->size; p++) {
378 u32 start = le32_to_cpu(p->start);
379 s32 size = le32_to_cpu(p->size); /* yes, it's signed. */
380
381 if (slot == state->limit)
382 break;
383
384 /*
385 * Negative sizes tell the RISC OS ICS driver to ignore
386 * this partition - in effect it says that this does not
387 * contain an ADFS filesystem.
388 */
389 if (size < 0) {
390 size = -size;
391
392 /*
393 * Our own extension - We use the first sector
394 * of the partition to identify what type this
395 * partition is. We must not make this visible
396 * to the filesystem.
397 */
398 if (size > 1 && adfspart_check_ICSLinux(state, start)) {
399 start += 1;
400 size -= 1;
401 }
402 }
403
404 if (size)
405 put_partition(state, slot++, start, size);
406 }
407
408 put_dev_sector(sect);
409 strlcat(state->pp_buf, "\n", PAGE_SIZE);
410 return 1;
411}
412#endif
413
414#ifdef CONFIG_ACORN_PARTITION_POWERTEC
415struct ptec_part {
416 __le32 unused1;
417 __le32 unused2;
418 __le32 start;
419 __le32 size;
420 __le32 unused5;
421 char type[8];
422};
423
424static inline int valid_ptec_sector(const unsigned char *data)
425{
426 unsigned char checksum = 0x2a;
427 int i;
428
429 /*
430 * If it looks like a PC/BIOS partition, then it
431 * probably isn't PowerTec.
432 */
433 if (data[510] == 0x55 && data[511] == 0xaa)
434 return 0;
435
436 for (i = 0; i < 511; i++)
437 checksum += data[i];
438
439 return checksum == data[511];
440}
441
442/*
443 * Purpose: allocate ICS partitions.
444 * Params : hd - pointer to gendisk structure to store partition info.
445 * dev - device number to access.
446 * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok.
447 * Alloc : hda = whole drive
448 * hda1 = ADFS partition 0 on first drive.
449 * hda2 = ADFS partition 1 on first drive.
450 * ..etc..
451 */
452int adfspart_check_POWERTEC(struct parsed_partitions *state)
453{
454 Sector sect;
455 const unsigned char *data;
456 const struct ptec_part *p;
457 int slot = 1;
458 int i;
459
460 data = read_part_sector(state, 0, &sect);
461 if (!data)
462 return -1;
463
464 if (!valid_ptec_sector(data)) {
465 put_dev_sector(sect);
466 return 0;
467 }
468
469 strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE);
470
471 for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) {
472 u32 start = le32_to_cpu(p->start);
473 u32 size = le32_to_cpu(p->size);
474
475 if (size)
476 put_partition(state, slot++, start, size);
477 }
478
479 put_dev_sector(sect);
480 strlcat(state->pp_buf, "\n", PAGE_SIZE);
481 return 1;
482}
483#endif
484
485#ifdef CONFIG_ACORN_PARTITION_EESOX
486struct eesox_part {
487 char magic[6];
488 char name[10];
489 __le32 start;
490 __le32 unused6;
491 __le32 unused7;
492 __le32 unused8;
493};
494
495/*
496 * Guess who created this format?
497 */
498static const char eesox_name[] = {
499 'N', 'e', 'i', 'l', ' ',
500 'C', 'r', 'i', 't', 'c', 'h', 'e', 'l', 'l', ' ', ' '
501};
502
503/*
504 * EESOX SCSI partition format.
505 *
506 * This is a goddamned awful partition format. We don't seem to store
507 * the size of the partition in this table, only the start addresses.
508 *
509 * There are two possibilities where the size comes from:
510 * 1. The individual ADFS boot block entries that are placed on the disk.
511 * 2. The start address of the next entry.
512 */
513int adfspart_check_EESOX(struct parsed_partitions *state)
514{
515 Sector sect;
516 const unsigned char *data;
517 unsigned char buffer[256];
518 struct eesox_part *p;
519 sector_t start = 0;
520 int i, slot = 1;
521
522 data = read_part_sector(state, 7, &sect);
523 if (!data)
524 return -1;
525
526 /*
527 * "Decrypt" the partition table. God knows why...
528 */
529 for (i = 0; i < 256; i++)
530 buffer[i] = data[i] ^ eesox_name[i & 15];
531
532 put_dev_sector(sect);
533
534 for (i = 0, p = (struct eesox_part *)buffer; i < 8; i++, p++) {
535 sector_t next;
536
537 if (memcmp(p->magic, "Eesox", 6))
538 break;
539
540 next = le32_to_cpu(p->start);
541 if (i)
542 put_partition(state, slot++, start, next - start);
543 start = next;
544 }
545
546 if (i != 0) {
547 sector_t size;
548
549 size = get_capacity(state->bdev->bd_disk);
550 put_partition(state, slot++, start, size - start);
551 strlcat(state->pp_buf, "\n", PAGE_SIZE);
552 }
553
554 return i ? 1 : 0;
555}
556#endif
diff --git a/fs/partitions/acorn.h b/fs/partitions/acorn.h
deleted file mode 100644
index ede828529692..000000000000
--- a/fs/partitions/acorn.h
+++ /dev/null
@@ -1,14 +0,0 @@
1/*
2 * linux/fs/partitions/acorn.h
3 *
4 * Copyright (C) 1996-2001 Russell King.
5 *
6 * I _hate_ this partitioning mess - why can't we have one defined
7 * format, and everyone stick to it?
8 */
9
10int adfspart_check_CUMANA(struct parsed_partitions *state);
11int adfspart_check_ADFS(struct parsed_partitions *state);
12int adfspart_check_ICS(struct parsed_partitions *state);
13int adfspart_check_POWERTEC(struct parsed_partitions *state);
14int adfspart_check_EESOX(struct parsed_partitions *state);
diff --git a/fs/partitions/amiga.c b/fs/partitions/amiga.c
deleted file mode 100644
index 70cbf44a1560..000000000000
--- a/fs/partitions/amiga.c
+++ /dev/null
@@ -1,139 +0,0 @@
1/*
2 * fs/partitions/amiga.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Copyright (C) 1991-1998 Linus Torvalds
7 * Re-organised Feb 1998 Russell King
8 */
9
10#include <linux/types.h>
11#include <linux/affs_hardblocks.h>
12
13#include "check.h"
14#include "amiga.h"
15
16static __inline__ u32
17checksum_block(__be32 *m, int size)
18{
19 u32 sum = 0;
20
21 while (size--)
22 sum += be32_to_cpu(*m++);
23 return sum;
24}
25
26int amiga_partition(struct parsed_partitions *state)
27{
28 Sector sect;
29 unsigned char *data;
30 struct RigidDiskBlock *rdb;
31 struct PartitionBlock *pb;
32 int start_sect, nr_sects, blk, part, res = 0;
33 int blksize = 1; /* Multiplier for disk block size */
34 int slot = 1;
35 char b[BDEVNAME_SIZE];
36
37 for (blk = 0; ; blk++, put_dev_sector(sect)) {
38 if (blk == RDB_ALLOCATION_LIMIT)
39 goto rdb_done;
40 data = read_part_sector(state, blk, &sect);
41 if (!data) {
42 if (warn_no_part)
43 printk("Dev %s: unable to read RDB block %d\n",
44 bdevname(state->bdev, b), blk);
45 res = -1;
46 goto rdb_done;
47 }
48 if (*(__be32 *)data != cpu_to_be32(IDNAME_RIGIDDISK))
49 continue;
50
51 rdb = (struct RigidDiskBlock *)data;
52 if (checksum_block((__be32 *)data, be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F) == 0)
53 break;
54 /* Try again with 0xdc..0xdf zeroed, Windows might have
55 * trashed it.
56 */
57 *(__be32 *)(data+0xdc) = 0;
58 if (checksum_block((__be32 *)data,
59 be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) {
60 printk("Warning: Trashed word at 0xd0 in block %d "
61 "ignored in checksum calculation\n",blk);
62 break;
63 }
64
65 printk("Dev %s: RDB in block %d has bad checksum\n",
66 bdevname(state->bdev, b), blk);
67 }
68
69 /* blksize is blocks per 512 byte standard block */
70 blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512;
71
72 {
73 char tmp[7 + 10 + 1 + 1];
74
75 /* Be more informative */
76 snprintf(tmp, sizeof(tmp), " RDSK (%d)", blksize * 512);
77 strlcat(state->pp_buf, tmp, PAGE_SIZE);
78 }
79 blk = be32_to_cpu(rdb->rdb_PartitionList);
80 put_dev_sector(sect);
81 for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
82 blk *= blksize; /* Read in terms partition table understands */
83 data = read_part_sector(state, blk, &sect);
84 if (!data) {
85 if (warn_no_part)
86 printk("Dev %s: unable to read partition block %d\n",
87 bdevname(state->bdev, b), blk);
88 res = -1;
89 goto rdb_done;
90 }
91 pb = (struct PartitionBlock *)data;
92 blk = be32_to_cpu(pb->pb_Next);
93 if (pb->pb_ID != cpu_to_be32(IDNAME_PARTITION))
94 continue;
95 if (checksum_block((__be32 *)pb, be32_to_cpu(pb->pb_SummedLongs) & 0x7F) != 0 )
96 continue;
97
98 /* Tell Kernel about it */
99
100 nr_sects = (be32_to_cpu(pb->pb_Environment[10]) + 1 -
101 be32_to_cpu(pb->pb_Environment[9])) *
102 be32_to_cpu(pb->pb_Environment[3]) *
103 be32_to_cpu(pb->pb_Environment[5]) *
104 blksize;
105 if (!nr_sects)
106 continue;
107 start_sect = be32_to_cpu(pb->pb_Environment[9]) *
108 be32_to_cpu(pb->pb_Environment[3]) *
109 be32_to_cpu(pb->pb_Environment[5]) *
110 blksize;
111 put_partition(state,slot++,start_sect,nr_sects);
112 {
113 /* Be even more informative to aid mounting */
114 char dostype[4];
115 char tmp[42];
116
117 __be32 *dt = (__be32 *)dostype;
118 *dt = pb->pb_Environment[16];
119 if (dostype[3] < ' ')
120 snprintf(tmp, sizeof(tmp), " (%c%c%c^%c)",
121 dostype[0], dostype[1],
122 dostype[2], dostype[3] + '@' );
123 else
124 snprintf(tmp, sizeof(tmp), " (%c%c%c%c)",
125 dostype[0], dostype[1],
126 dostype[2], dostype[3]);
127 strlcat(state->pp_buf, tmp, PAGE_SIZE);
128 snprintf(tmp, sizeof(tmp), "(res %d spb %d)",
129 be32_to_cpu(pb->pb_Environment[6]),
130 be32_to_cpu(pb->pb_Environment[4]));
131 strlcat(state->pp_buf, tmp, PAGE_SIZE);
132 }
133 res = 1;
134 }
135 strlcat(state->pp_buf, "\n", PAGE_SIZE);
136
137rdb_done:
138 return res;
139}
diff --git a/fs/partitions/amiga.h b/fs/partitions/amiga.h
deleted file mode 100644
index d094585cadaa..000000000000
--- a/fs/partitions/amiga.h
+++ /dev/null
@@ -1,6 +0,0 @@
1/*
2 * fs/partitions/amiga.h
3 */
4
5int amiga_partition(struct parsed_partitions *state);
6
diff --git a/fs/partitions/atari.c b/fs/partitions/atari.c
deleted file mode 100644
index 9875b05e80a2..000000000000
--- a/fs/partitions/atari.c
+++ /dev/null
@@ -1,149 +0,0 @@
1/*
2 * fs/partitions/atari.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Copyright (C) 1991-1998 Linus Torvalds
7 * Re-organised Feb 1998 Russell King
8 */
9
10#include <linux/ctype.h>
11#include "check.h"
12#include "atari.h"
13
14/* ++guenther: this should be settable by the user ("make config")?.
15 */
16#define ICD_PARTS
17
18/* check if a partition entry looks valid -- Atari format is assumed if at
19 least one of the primary entries is ok this way */
20#define VALID_PARTITION(pi,hdsiz) \
21 (((pi)->flg & 1) && \
22 isalnum((pi)->id[0]) && isalnum((pi)->id[1]) && isalnum((pi)->id[2]) && \
23 be32_to_cpu((pi)->st) <= (hdsiz) && \
24 be32_to_cpu((pi)->st) + be32_to_cpu((pi)->siz) <= (hdsiz))
25
26static inline int OK_id(char *s)
27{
28 return memcmp (s, "GEM", 3) == 0 || memcmp (s, "BGM", 3) == 0 ||
29 memcmp (s, "LNX", 3) == 0 || memcmp (s, "SWP", 3) == 0 ||
30 memcmp (s, "RAW", 3) == 0 ;
31}
32
33int atari_partition(struct parsed_partitions *state)
34{
35 Sector sect;
36 struct rootsector *rs;
37 struct partition_info *pi;
38 u32 extensect;
39 u32 hd_size;
40 int slot;
41#ifdef ICD_PARTS
42 int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
43#endif
44
45 rs = read_part_sector(state, 0, &sect);
46 if (!rs)
47 return -1;
48
49 /* Verify this is an Atari rootsector: */
50 hd_size = state->bdev->bd_inode->i_size >> 9;
51 if (!VALID_PARTITION(&rs->part[0], hd_size) &&
52 !VALID_PARTITION(&rs->part[1], hd_size) &&
53 !VALID_PARTITION(&rs->part[2], hd_size) &&
54 !VALID_PARTITION(&rs->part[3], hd_size)) {
55 /*
56 * if there's no valid primary partition, assume that no Atari
57 * format partition table (there's no reliable magic or the like
58 * :-()
59 */
60 put_dev_sector(sect);
61 return 0;
62 }
63
64 pi = &rs->part[0];
65 strlcat(state->pp_buf, " AHDI", PAGE_SIZE);
66 for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) {
67 struct rootsector *xrs;
68 Sector sect2;
69 ulong partsect;
70
71 if ( !(pi->flg & 1) )
72 continue;
73 /* active partition */
74 if (memcmp (pi->id, "XGM", 3) != 0) {
75 /* we don't care about other id's */
76 put_partition (state, slot, be32_to_cpu(pi->st),
77 be32_to_cpu(pi->siz));
78 continue;
79 }
80 /* extension partition */
81#ifdef ICD_PARTS
82 part_fmt = 1;
83#endif
84 strlcat(state->pp_buf, " XGM<", PAGE_SIZE);
85 partsect = extensect = be32_to_cpu(pi->st);
86 while (1) {
87 xrs = read_part_sector(state, partsect, &sect2);
88 if (!xrs) {
89 printk (" block %ld read failed\n", partsect);
90 put_dev_sector(sect);
91 return -1;
92 }
93
94 /* ++roman: sanity check: bit 0 of flg field must be set */
95 if (!(xrs->part[0].flg & 1)) {
96 printk( "\nFirst sub-partition in extended partition is not valid!\n" );
97 put_dev_sector(sect2);
98 break;
99 }
100
101 put_partition(state, slot,
102 partsect + be32_to_cpu(xrs->part[0].st),
103 be32_to_cpu(xrs->part[0].siz));
104
105 if (!(xrs->part[1].flg & 1)) {
106 /* end of linked partition list */
107 put_dev_sector(sect2);
108 break;
109 }
110 if (memcmp( xrs->part[1].id, "XGM", 3 ) != 0) {
111 printk("\nID of extended partition is not XGM!\n");
112 put_dev_sector(sect2);
113 break;
114 }
115
116 partsect = be32_to_cpu(xrs->part[1].st) + extensect;
117 put_dev_sector(sect2);
118 if (++slot == state->limit) {
119 printk( "\nMaximum number of partitions reached!\n" );
120 break;
121 }
122 }
123 strlcat(state->pp_buf, " >", PAGE_SIZE);
124 }
125#ifdef ICD_PARTS
126 if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */
127 pi = &rs->icdpart[0];
128 /* sanity check: no ICD format if first partition invalid */
129 if (OK_id(pi->id)) {
130 strlcat(state->pp_buf, " ICD<", PAGE_SIZE);
131 for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) {
132 /* accept only GEM,BGM,RAW,LNX,SWP partitions */
133 if (!((pi->flg & 1) && OK_id(pi->id)))
134 continue;
135 part_fmt = 2;
136 put_partition (state, slot,
137 be32_to_cpu(pi->st),
138 be32_to_cpu(pi->siz));
139 }
140 strlcat(state->pp_buf, " >", PAGE_SIZE);
141 }
142 }
143#endif
144 put_dev_sector(sect);
145
146 strlcat(state->pp_buf, "\n", PAGE_SIZE);
147
148 return 1;
149}
diff --git a/fs/partitions/atari.h b/fs/partitions/atari.h
deleted file mode 100644
index fe2d32a89f36..000000000000
--- a/fs/partitions/atari.h
+++ /dev/null
@@ -1,34 +0,0 @@
1/*
2 * fs/partitions/atari.h
3 * Moved by Russell King from:
4 *
5 * linux/include/linux/atari_rootsec.h
6 * definitions for Atari Rootsector layout
7 * by Andreas Schwab (schwab@ls5.informatik.uni-dortmund.de)
8 *
9 * modified for ICD/Supra partitioning scheme restricted to at most 12
10 * partitions
11 * by Guenther Kelleter (guenther@pool.informatik.rwth-aachen.de)
12 */
13
14struct partition_info
15{
16 u8 flg; /* bit 0: active; bit 7: bootable */
17 char id[3]; /* "GEM", "BGM", "XGM", or other */
18 __be32 st; /* start of partition */
19 __be32 siz; /* length of partition */
20};
21
22struct rootsector
23{
24 char unused[0x156]; /* room for boot code */
25 struct partition_info icdpart[8]; /* info for ICD-partitions 5..12 */
26 char unused2[0xc];
27 u32 hd_siz; /* size of disk in blocks */
28 struct partition_info part[4];
29 u32 bsl_st; /* start of bad sector list */
30 u32 bsl_cnt; /* length of bad sector list */
31 u16 checksum; /* checksum for bootable disks */
32} __attribute__((__packed__));
33
34int atari_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
deleted file mode 100644
index e3c63d1c5e13..000000000000
--- a/fs/partitions/check.c
+++ /dev/null
@@ -1,687 +0,0 @@
1/*
2 * fs/partitions/check.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 * Copyright (C) 1991-1998 Linus Torvalds
6 * Re-organised Feb 1998 Russell King
7 *
8 * We now have independent partition support from the
9 * block drivers, which allows all the partition code to
10 * be grouped in one location, and it to be mostly self
11 * contained.
12 *
13 * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl}
14 */
15
16#include <linux/init.h>
17#include <linux/module.h>
18#include <linux/fs.h>
19#include <linux/slab.h>
20#include <linux/kmod.h>
21#include <linux/ctype.h>
22#include <linux/genhd.h>
23#include <linux/blktrace_api.h>
24
25#include "check.h"
26
27#include "acorn.h"
28#include "amiga.h"
29#include "atari.h"
30#include "ldm.h"
31#include "mac.h"
32#include "msdos.h"
33#include "osf.h"
34#include "sgi.h"
35#include "sun.h"
36#include "ibm.h"
37#include "ultrix.h"
38#include "efi.h"
39#include "karma.h"
40#include "sysv68.h"
41
42#ifdef CONFIG_BLK_DEV_MD
43extern void md_autodetect_dev(dev_t dev);
44#endif
45
46int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
47
48static int (*check_part[])(struct parsed_partitions *) = {
49 /*
50 * Probe partition formats with tables at disk address 0
51 * that also have an ADFS boot block at 0xdc0.
52 */
53#ifdef CONFIG_ACORN_PARTITION_ICS
54 adfspart_check_ICS,
55#endif
56#ifdef CONFIG_ACORN_PARTITION_POWERTEC
57 adfspart_check_POWERTEC,
58#endif
59#ifdef CONFIG_ACORN_PARTITION_EESOX
60 adfspart_check_EESOX,
61#endif
62
63 /*
64 * Now move on to formats that only have partition info at
65 * disk address 0xdc0. Since these may also have stale
66 * PC/BIOS partition tables, they need to come before
67 * the msdos entry.
68 */
69#ifdef CONFIG_ACORN_PARTITION_CUMANA
70 adfspart_check_CUMANA,
71#endif
72#ifdef CONFIG_ACORN_PARTITION_ADFS
73 adfspart_check_ADFS,
74#endif
75
76#ifdef CONFIG_EFI_PARTITION
77 efi_partition, /* this must come before msdos */
78#endif
79#ifdef CONFIG_SGI_PARTITION
80 sgi_partition,
81#endif
82#ifdef CONFIG_LDM_PARTITION
83 ldm_partition, /* this must come before msdos */
84#endif
85#ifdef CONFIG_MSDOS_PARTITION
86 msdos_partition,
87#endif
88#ifdef CONFIG_OSF_PARTITION
89 osf_partition,
90#endif
91#ifdef CONFIG_SUN_PARTITION
92 sun_partition,
93#endif
94#ifdef CONFIG_AMIGA_PARTITION
95 amiga_partition,
96#endif
97#ifdef CONFIG_ATARI_PARTITION
98 atari_partition,
99#endif
100#ifdef CONFIG_MAC_PARTITION
101 mac_partition,
102#endif
103#ifdef CONFIG_ULTRIX_PARTITION
104 ultrix_partition,
105#endif
106#ifdef CONFIG_IBM_PARTITION
107 ibm_partition,
108#endif
109#ifdef CONFIG_KARMA_PARTITION
110 karma_partition,
111#endif
112#ifdef CONFIG_SYSV68_PARTITION
113 sysv68_partition,
114#endif
115 NULL
116};
117
118/*
119 * disk_name() is used by partition check code and the genhd driver.
120 * It formats the devicename of the indicated disk into
121 * the supplied buffer (of size at least 32), and returns
122 * a pointer to that same buffer (for convenience).
123 */
124
125char *disk_name(struct gendisk *hd, int partno, char *buf)
126{
127 if (!partno)
128 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
129 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
130 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
131 else
132 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);
133
134 return buf;
135}
136
137const char *bdevname(struct block_device *bdev, char *buf)
138{
139 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
140}
141
142EXPORT_SYMBOL(bdevname);
143
144/*
145 * There's very little reason to use this, you should really
146 * have a struct block_device just about everywhere and use
147 * bdevname() instead.
148 */
149const char *__bdevname(dev_t dev, char *buffer)
150{
151 scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
152 MAJOR(dev), MINOR(dev));
153 return buffer;
154}
155
156EXPORT_SYMBOL(__bdevname);
157
158static struct parsed_partitions *
159check_partition(struct gendisk *hd, struct block_device *bdev)
160{
161 struct parsed_partitions *state;
162 int i, res, err;
163
164 state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL);
165 if (!state)
166 return NULL;
167 state->pp_buf = (char *)__get_free_page(GFP_KERNEL);
168 if (!state->pp_buf) {
169 kfree(state);
170 return NULL;
171 }
172 state->pp_buf[0] = '\0';
173
174 state->bdev = bdev;
175 disk_name(hd, 0, state->name);
176 snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name);
177 if (isdigit(state->name[strlen(state->name)-1]))
178 sprintf(state->name, "p");
179
180 state->limit = disk_max_parts(hd);
181 i = res = err = 0;
182 while (!res && check_part[i]) {
183 memset(&state->parts, 0, sizeof(state->parts));
184 res = check_part[i++](state);
185 if (res < 0) {
186 /* We have hit an I/O error which we don't report now.
187 * But record it, and let the others do their job.
188 */
189 err = res;
190 res = 0;
191 }
192
193 }
194 if (res > 0) {
195 printk(KERN_INFO "%s", state->pp_buf);
196
197 free_page((unsigned long)state->pp_buf);
198 return state;
199 }
200 if (state->access_beyond_eod)
201 err = -ENOSPC;
202 if (err)
203 /* The partition is unrecognized. So report I/O errors if there were any */
204 res = err;
205 if (!res)
206 strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE);
207 else if (warn_no_part)
208 strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE);
209
210 printk(KERN_INFO "%s", state->pp_buf);
211
212 free_page((unsigned long)state->pp_buf);
213 kfree(state);
214 return ERR_PTR(res);
215}
216
217static ssize_t part_partition_show(struct device *dev,
218 struct device_attribute *attr, char *buf)
219{
220 struct hd_struct *p = dev_to_part(dev);
221
222 return sprintf(buf, "%d\n", p->partno);
223}
224
225static ssize_t part_start_show(struct device *dev,
226 struct device_attribute *attr, char *buf)
227{
228 struct hd_struct *p = dev_to_part(dev);
229
230 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
231}
232
233ssize_t part_size_show(struct device *dev,
234 struct device_attribute *attr, char *buf)
235{
236 struct hd_struct *p = dev_to_part(dev);
237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
238}
239
240static ssize_t part_ro_show(struct device *dev,
241 struct device_attribute *attr, char *buf)
242{
243 struct hd_struct *p = dev_to_part(dev);
244 return sprintf(buf, "%d\n", p->policy ? 1 : 0);
245}
246
247static ssize_t part_alignment_offset_show(struct device *dev,
248 struct device_attribute *attr, char *buf)
249{
250 struct hd_struct *p = dev_to_part(dev);
251 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
252}
253
254static ssize_t part_discard_alignment_show(struct device *dev,
255 struct device_attribute *attr, char *buf)
256{
257 struct hd_struct *p = dev_to_part(dev);
258 return sprintf(buf, "%u\n", p->discard_alignment);
259}
260
261ssize_t part_stat_show(struct device *dev,
262 struct device_attribute *attr, char *buf)
263{
264 struct hd_struct *p = dev_to_part(dev);
265 int cpu;
266
267 cpu = part_stat_lock();
268 part_round_stats(cpu, p);
269 part_stat_unlock();
270 return sprintf(buf,
271 "%8lu %8lu %8llu %8u "
272 "%8lu %8lu %8llu %8u "
273 "%8u %8u %8u"
274 "\n",
275 part_stat_read(p, ios[READ]),
276 part_stat_read(p, merges[READ]),
277 (unsigned long long)part_stat_read(p, sectors[READ]),
278 jiffies_to_msecs(part_stat_read(p, ticks[READ])),
279 part_stat_read(p, ios[WRITE]),
280 part_stat_read(p, merges[WRITE]),
281 (unsigned long long)part_stat_read(p, sectors[WRITE]),
282 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
283 part_in_flight(p),
284 jiffies_to_msecs(part_stat_read(p, io_ticks)),
285 jiffies_to_msecs(part_stat_read(p, time_in_queue)));
286}
287
288ssize_t part_inflight_show(struct device *dev,
289 struct device_attribute *attr, char *buf)
290{
291 struct hd_struct *p = dev_to_part(dev);
292
293 return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
294 atomic_read(&p->in_flight[1]));
295}
296
297#ifdef CONFIG_FAIL_MAKE_REQUEST
298ssize_t part_fail_show(struct device *dev,
299 struct device_attribute *attr, char *buf)
300{
301 struct hd_struct *p = dev_to_part(dev);
302
303 return sprintf(buf, "%d\n", p->make_it_fail);
304}
305
306ssize_t part_fail_store(struct device *dev,
307 struct device_attribute *attr,
308 const char *buf, size_t count)
309{
310 struct hd_struct *p = dev_to_part(dev);
311 int i;
312
313 if (count > 0 && sscanf(buf, "%d", &i) > 0)
314 p->make_it_fail = (i == 0) ? 0 : 1;
315
316 return count;
317}
318#endif
319
320static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
321static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
322static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
323static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
324static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
325static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
326 NULL);
327static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
328static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
329#ifdef CONFIG_FAIL_MAKE_REQUEST
330static struct device_attribute dev_attr_fail =
331 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
332#endif
333
334static struct attribute *part_attrs[] = {
335 &dev_attr_partition.attr,
336 &dev_attr_start.attr,
337 &dev_attr_size.attr,
338 &dev_attr_ro.attr,
339 &dev_attr_alignment_offset.attr,
340 &dev_attr_discard_alignment.attr,
341 &dev_attr_stat.attr,
342 &dev_attr_inflight.attr,
343#ifdef CONFIG_FAIL_MAKE_REQUEST
344 &dev_attr_fail.attr,
345#endif
346 NULL
347};
348
349static struct attribute_group part_attr_group = {
350 .attrs = part_attrs,
351};
352
353static const struct attribute_group *part_attr_groups[] = {
354 &part_attr_group,
355#ifdef CONFIG_BLK_DEV_IO_TRACE
356 &blk_trace_attr_group,
357#endif
358 NULL
359};
360
361static void part_release(struct device *dev)
362{
363 struct hd_struct *p = dev_to_part(dev);
364 free_part_stats(p);
365 free_part_info(p);
366 kfree(p);
367}
368
369struct device_type part_type = {
370 .name = "partition",
371 .groups = part_attr_groups,
372 .release = part_release,
373};
374
375static void delete_partition_rcu_cb(struct rcu_head *head)
376{
377 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head);
378
379 part->start_sect = 0;
380 part->nr_sects = 0;
381 part_stat_set_all(part, 0);
382 put_device(part_to_dev(part));
383}
384
385void __delete_partition(struct hd_struct *part)
386{
387 call_rcu(&part->rcu_head, delete_partition_rcu_cb);
388}
389
390void delete_partition(struct gendisk *disk, int partno)
391{
392 struct disk_part_tbl *ptbl = disk->part_tbl;
393 struct hd_struct *part;
394
395 if (partno >= ptbl->len)
396 return;
397
398 part = ptbl->part[partno];
399 if (!part)
400 return;
401
402 blk_free_devt(part_devt(part));
403 rcu_assign_pointer(ptbl->part[partno], NULL);
404 rcu_assign_pointer(ptbl->last_lookup, NULL);
405 kobject_put(part->holder_dir);
406 device_del(part_to_dev(part));
407
408 hd_struct_put(part);
409}
410
411static ssize_t whole_disk_show(struct device *dev,
412 struct device_attribute *attr, char *buf)
413{
414 return 0;
415}
416static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
417 whole_disk_show, NULL);
418
419struct hd_struct *add_partition(struct gendisk *disk, int partno,
420 sector_t start, sector_t len, int flags,
421 struct partition_meta_info *info)
422{
423 struct hd_struct *p;
424 dev_t devt = MKDEV(0, 0);
425 struct device *ddev = disk_to_dev(disk);
426 struct device *pdev;
427 struct disk_part_tbl *ptbl;
428 const char *dname;
429 int err;
430
431 err = disk_expand_part_tbl(disk, partno);
432 if (err)
433 return ERR_PTR(err);
434 ptbl = disk->part_tbl;
435
436 if (ptbl->part[partno])
437 return ERR_PTR(-EBUSY);
438
439 p = kzalloc(sizeof(*p), GFP_KERNEL);
440 if (!p)
441 return ERR_PTR(-EBUSY);
442
443 if (!init_part_stats(p)) {
444 err = -ENOMEM;
445 goto out_free;
446 }
447 pdev = part_to_dev(p);
448
449 p->start_sect = start;
450 p->alignment_offset =
451 queue_limit_alignment_offset(&disk->queue->limits, start);
452 p->discard_alignment =
453 queue_limit_discard_alignment(&disk->queue->limits, start);
454 p->nr_sects = len;
455 p->partno = partno;
456 p->policy = get_disk_ro(disk);
457
458 if (info) {
459 struct partition_meta_info *pinfo = alloc_part_info(disk);
460 if (!pinfo)
461 goto out_free_stats;
462 memcpy(pinfo, info, sizeof(*info));
463 p->info = pinfo;
464 }
465
466 dname = dev_name(ddev);
467 if (isdigit(dname[strlen(dname) - 1]))
468 dev_set_name(pdev, "%sp%d", dname, partno);
469 else
470 dev_set_name(pdev, "%s%d", dname, partno);
471
472 device_initialize(pdev);
473 pdev->class = &block_class;
474 pdev->type = &part_type;
475 pdev->parent = ddev;
476
477 err = blk_alloc_devt(p, &devt);
478 if (err)
479 goto out_free_info;
480 pdev->devt = devt;
481
482 /* delay uevent until 'holders' subdir is created */
483 dev_set_uevent_suppress(pdev, 1);
484 err = device_add(pdev);
485 if (err)
486 goto out_put;
487
488 err = -ENOMEM;
489 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
490 if (!p->holder_dir)
491 goto out_del;
492
493 dev_set_uevent_suppress(pdev, 0);
494 if (flags & ADDPART_FLAG_WHOLEDISK) {
495 err = device_create_file(pdev, &dev_attr_whole_disk);
496 if (err)
497 goto out_del;
498 }
499
500 /* everything is up and running, commence */
501 rcu_assign_pointer(ptbl->part[partno], p);
502
503 /* suppress uevent if the disk suppresses it */
504 if (!dev_get_uevent_suppress(ddev))
505 kobject_uevent(&pdev->kobj, KOBJ_ADD);
506
507 hd_ref_init(p);
508 return p;
509
510out_free_info:
511 free_part_info(p);
512out_free_stats:
513 free_part_stats(p);
514out_free:
515 kfree(p);
516 return ERR_PTR(err);
517out_del:
518 kobject_put(p->holder_dir);
519 device_del(pdev);
520out_put:
521 put_device(pdev);
522 blk_free_devt(devt);
523 return ERR_PTR(err);
524}
525
526static bool disk_unlock_native_capacity(struct gendisk *disk)
527{
528 const struct block_device_operations *bdops = disk->fops;
529
530 if (bdops->unlock_native_capacity &&
531 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
532 printk(KERN_CONT "enabling native capacity\n");
533 bdops->unlock_native_capacity(disk);
534 disk->flags |= GENHD_FL_NATIVE_CAPACITY;
535 return true;
536 } else {
537 printk(KERN_CONT "truncated\n");
538 return false;
539 }
540}
541
542int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
543{
544 struct parsed_partitions *state = NULL;
545 struct disk_part_iter piter;
546 struct hd_struct *part;
547 int p, highest, res;
548rescan:
549 if (state && !IS_ERR(state)) {
550 kfree(state);
551 state = NULL;
552 }
553
554 if (bdev->bd_part_count)
555 return -EBUSY;
556 res = invalidate_partition(disk, 0);
557 if (res)
558 return res;
559
560 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
561 while ((part = disk_part_iter_next(&piter)))
562 delete_partition(disk, part->partno);
563 disk_part_iter_exit(&piter);
564
565 if (disk->fops->revalidate_disk)
566 disk->fops->revalidate_disk(disk);
567 check_disk_size_change(disk, bdev);
568 bdev->bd_invalidated = 0;
569 if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
570 return 0;
571 if (IS_ERR(state)) {
572 /*
573 * I/O error reading the partition table. If any
574 * partition code tried to read beyond EOD, retry
575 * after unlocking native capacity.
576 */
577 if (PTR_ERR(state) == -ENOSPC) {
578 printk(KERN_WARNING "%s: partition table beyond EOD, ",
579 disk->disk_name);
580 if (disk_unlock_native_capacity(disk))
581 goto rescan;
582 }
583 return -EIO;
584 }
585 /*
586 * If any partition code tried to read beyond EOD, try
587 * unlocking native capacity even if partition table is
588 * successfully read as we could be missing some partitions.
589 */
590 if (state->access_beyond_eod) {
591 printk(KERN_WARNING
592 "%s: partition table partially beyond EOD, ",
593 disk->disk_name);
594 if (disk_unlock_native_capacity(disk))
595 goto rescan;
596 }
597
598 /* tell userspace that the media / partition table may have changed */
599 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
600
601 /* Detect the highest partition number and preallocate
602 * disk->part_tbl. This is an optimization and not strictly
603 * necessary.
604 */
605 for (p = 1, highest = 0; p < state->limit; p++)
606 if (state->parts[p].size)
607 highest = p;
608
609 disk_expand_part_tbl(disk, highest);
610
611 /* add partitions */
612 for (p = 1; p < state->limit; p++) {
613 sector_t size, from;
614 struct partition_meta_info *info = NULL;
615
616 size = state->parts[p].size;
617 if (!size)
618 continue;
619
620 from = state->parts[p].from;
621 if (from >= get_capacity(disk)) {
622 printk(KERN_WARNING
623 "%s: p%d start %llu is beyond EOD, ",
624 disk->disk_name, p, (unsigned long long) from);
625 if (disk_unlock_native_capacity(disk))
626 goto rescan;
627 continue;
628 }
629
630 if (from + size > get_capacity(disk)) {
631 printk(KERN_WARNING
632 "%s: p%d size %llu extends beyond EOD, ",
633 disk->disk_name, p, (unsigned long long) size);
634
635 if (disk_unlock_native_capacity(disk)) {
636 /* free state and restart */
637 goto rescan;
638 } else {
639 /*
640 * we can not ignore partitions of broken tables
641 * created by for example camera firmware, but
642 * we limit them to the end of the disk to avoid
643 * creating invalid block devices
644 */
645 size = get_capacity(disk) - from;
646 }
647 }
648
649 if (state->parts[p].has_info)
650 info = &state->parts[p].info;
651 part = add_partition(disk, p, from, size,
652 state->parts[p].flags,
653 &state->parts[p].info);
654 if (IS_ERR(part)) {
655 printk(KERN_ERR " %s: p%d could not be added: %ld\n",
656 disk->disk_name, p, -PTR_ERR(part));
657 continue;
658 }
659#ifdef CONFIG_BLK_DEV_MD
660 if (state->parts[p].flags & ADDPART_FLAG_RAID)
661 md_autodetect_dev(part_to_dev(part)->devt);
662#endif
663 }
664 kfree(state);
665 return 0;
666}
667
668unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
669{
670 struct address_space *mapping = bdev->bd_inode->i_mapping;
671 struct page *page;
672
673 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)),
674 NULL);
675 if (!IS_ERR(page)) {
676 if (PageError(page))
677 goto fail;
678 p->v = page;
679 return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9);
680fail:
681 page_cache_release(page);
682 }
683 p->v = NULL;
684 return NULL;
685}
686
687EXPORT_SYMBOL(read_dev_sector);
diff --git a/fs/partitions/check.h b/fs/partitions/check.h
deleted file mode 100644
index d68bf4dc3bc2..000000000000
--- a/fs/partitions/check.h
+++ /dev/null
@@ -1,49 +0,0 @@
1#include <linux/pagemap.h>
2#include <linux/blkdev.h>
3#include <linux/genhd.h>
4
5/*
6 * add_gd_partition adds a partitions details to the devices partition
7 * description.
8 */
9struct parsed_partitions {
10 struct block_device *bdev;
11 char name[BDEVNAME_SIZE];
12 struct {
13 sector_t from;
14 sector_t size;
15 int flags;
16 bool has_info;
17 struct partition_meta_info info;
18 } parts[DISK_MAX_PARTS];
19 int next;
20 int limit;
21 bool access_beyond_eod;
22 char *pp_buf;
23};
24
25static inline void *read_part_sector(struct parsed_partitions *state,
26 sector_t n, Sector *p)
27{
28 if (n >= get_capacity(state->bdev->bd_disk)) {
29 state->access_beyond_eod = true;
30 return NULL;
31 }
32 return read_dev_sector(state->bdev, n, p);
33}
34
35static inline void
36put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size)
37{
38 if (n < p->limit) {
39 char tmp[1 + BDEVNAME_SIZE + 10 + 1];
40
41 p->parts[n].from = from;
42 p->parts[n].size = size;
43 snprintf(tmp, sizeof(tmp), " %s%d", p->name, n);
44 strlcat(p->pp_buf, tmp, PAGE_SIZE);
45 }
46}
47
48extern int warn_no_part;
49
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
deleted file mode 100644
index 6296b403c67a..000000000000
--- a/fs/partitions/efi.c
+++ /dev/null
@@ -1,675 +0,0 @@
1/************************************************************
2 * EFI GUID Partition Table handling
3 *
4 * http://www.uefi.org/specs/
5 * http://www.intel.com/technology/efi/
6 *
7 * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com>
8 * Copyright 2000,2001,2002,2004 Dell Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 *
24 *
25 * TODO:
26 *
27 * Changelog:
28 * Mon Nov 09 2004 Matt Domsch <Matt_Domsch@dell.com>
29 * - test for valid PMBR and valid PGPT before ever reading
30 * AGPT, allow override with 'gpt' kernel command line option.
31 * - check for first/last_usable_lba outside of size of disk
32 *
33 * Tue Mar 26 2002 Matt Domsch <Matt_Domsch@dell.com>
34 * - Ported to 2.5.7-pre1 and 2.5.7-dj2
35 * - Applied patch to avoid fault in alternate header handling
36 * - cleaned up find_valid_gpt
37 * - On-disk structure and copy in memory is *always* LE now -
38 * swab fields as needed
39 * - remove print_gpt_header()
40 * - only use first max_p partition entries, to keep the kernel minor number
41 * and partition numbers tied.
42 *
43 * Mon Feb 04 2002 Matt Domsch <Matt_Domsch@dell.com>
44 * - Removed __PRIPTR_PREFIX - not being used
45 *
46 * Mon Jan 14 2002 Matt Domsch <Matt_Domsch@dell.com>
47 * - Ported to 2.5.2-pre11 + library crc32 patch Linus applied
48 *
49 * Thu Dec 6 2001 Matt Domsch <Matt_Domsch@dell.com>
50 * - Added compare_gpts().
51 * - moved le_efi_guid_to_cpus() back into this file. GPT is the only
52 * thing that keeps EFI GUIDs on disk.
53 * - Changed gpt structure names and members to be simpler and more Linux-like.
54 *
55 * Wed Oct 17 2001 Matt Domsch <Matt_Domsch@dell.com>
56 * - Removed CONFIG_DEVFS_VOLUMES_UUID code entirely per Martin Wilck
57 *
58 * Wed Oct 10 2001 Matt Domsch <Matt_Domsch@dell.com>
59 * - Changed function comments to DocBook style per Andreas Dilger suggestion.
60 *
61 * Mon Oct 08 2001 Matt Domsch <Matt_Domsch@dell.com>
62 * - Change read_lba() to use the page cache per Al Viro's work.
63 * - print u64s properly on all architectures
64 * - fixed debug_printk(), now Dprintk()
65 *
66 * Mon Oct 01 2001 Matt Domsch <Matt_Domsch@dell.com>
67 * - Style cleanups
68 * - made most functions static
69 * - Endianness addition
70 * - remove test for second alternate header, as it's not per spec,
71 * and is unnecessary. There's now a method to read/write the last
72 * sector of an odd-sized disk from user space. No tools have ever
73 * been released which used this code, so it's effectively dead.
74 * - Per Asit Mallick of Intel, added a test for a valid PMBR.
75 * - Added kernel command line option 'gpt' to override valid PMBR test.
76 *
77 * Wed Jun 6 2001 Martin Wilck <Martin.Wilck@Fujitsu-Siemens.com>
78 * - added devfs volume UUID support (/dev/volumes/uuids) for
79 * mounting file systems by the partition GUID.
80 *
81 * Tue Dec 5 2000 Matt Domsch <Matt_Domsch@dell.com>
82 * - Moved crc32() to linux/lib, added efi_crc32().
83 *
84 * Thu Nov 30 2000 Matt Domsch <Matt_Domsch@dell.com>
85 * - Replaced Intel's CRC32 function with an equivalent
86 * non-license-restricted version.
87 *
88 * Wed Oct 25 2000 Matt Domsch <Matt_Domsch@dell.com>
89 * - Fixed the last_lba() call to return the proper last block
90 *
91 * Thu Oct 12 2000 Matt Domsch <Matt_Domsch@dell.com>
92 * - Thanks to Andries Brouwer for his debugging assistance.
93 * - Code works, detects all the partitions.
94 *
95 ************************************************************/
96#include <linux/crc32.h>
97#include <linux/ctype.h>
98#include <linux/math64.h>
99#include <linux/slab.h>
100#include "check.h"
101#include "efi.h"
102
103/* This allows a kernel command line option 'gpt' to override
104 * the test for invalid PMBR. Not __initdata because reloading
105 * the partition tables happens after init too.
106 */
107static int force_gpt;
108static int __init
109force_gpt_fn(char *str)
110{
111 force_gpt = 1;
112 return 1;
113}
114__setup("gpt", force_gpt_fn);
115
116
117/**
118 * efi_crc32() - EFI version of crc32 function
119 * @buf: buffer to calculate crc32 of
120 * @len - length of buf
121 *
122 * Description: Returns EFI-style CRC32 value for @buf
123 *
124 * This function uses the little endian Ethernet polynomial
125 * but seeds the function with ~0, and xor's with ~0 at the end.
126 * Note, the EFI Specification, v1.02, has a reference to
127 * Dr. Dobbs Journal, May 1994 (actually it's in May 1992).
128 */
129static inline u32
130efi_crc32(const void *buf, unsigned long len)
131{
132 return (crc32(~0L, buf, len) ^ ~0L);
133}
134
135/**
136 * last_lba(): return number of last logical block of device
137 * @bdev: block device
138 *
139 * Description: Returns last LBA value on success, 0 on error.
140 * This is stored (by sd and ide-geometry) in
141 * the part[0] entry for this disk, and is the number of
142 * physical sectors available on the disk.
143 */
144static u64 last_lba(struct block_device *bdev)
145{
146 if (!bdev || !bdev->bd_inode)
147 return 0;
148 return div_u64(bdev->bd_inode->i_size,
149 bdev_logical_block_size(bdev)) - 1ULL;
150}
151
152static inline int
153pmbr_part_valid(struct partition *part)
154{
155 if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT &&
156 le32_to_cpu(part->start_sect) == 1UL)
157 return 1;
158 return 0;
159}
160
161/**
162 * is_pmbr_valid(): test Protective MBR for validity
163 * @mbr: pointer to a legacy mbr structure
164 *
165 * Description: Returns 1 if PMBR is valid, 0 otherwise.
166 * Validity depends on two things:
167 * 1) MSDOS signature is in the last two bytes of the MBR
168 * 2) One partition of type 0xEE is found
169 */
170static int
171is_pmbr_valid(legacy_mbr *mbr)
172{
173 int i;
174 if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE)
175 return 0;
176 for (i = 0; i < 4; i++)
177 if (pmbr_part_valid(&mbr->partition_record[i]))
178 return 1;
179 return 0;
180}
181
182/**
183 * read_lba(): Read bytes from disk, starting at given LBA
184 * @state
185 * @lba
186 * @buffer
187 * @size_t
188 *
189 * Description: Reads @count bytes from @state->bdev into @buffer.
190 * Returns number of bytes read on success, 0 on error.
191 */
192static size_t read_lba(struct parsed_partitions *state,
193 u64 lba, u8 *buffer, size_t count)
194{
195 size_t totalreadcount = 0;
196 struct block_device *bdev = state->bdev;
197 sector_t n = lba * (bdev_logical_block_size(bdev) / 512);
198
199 if (!buffer || lba > last_lba(bdev))
200 return 0;
201
202 while (count) {
203 int copied = 512;
204 Sector sect;
205 unsigned char *data = read_part_sector(state, n++, &sect);
206 if (!data)
207 break;
208 if (copied > count)
209 copied = count;
210 memcpy(buffer, data, copied);
211 put_dev_sector(sect);
212 buffer += copied;
213 totalreadcount +=copied;
214 count -= copied;
215 }
216 return totalreadcount;
217}
218
219/**
220 * alloc_read_gpt_entries(): reads partition entries from disk
221 * @state
222 * @gpt - GPT header
223 *
224 * Description: Returns ptes on success, NULL on error.
225 * Allocates space for PTEs based on information found in @gpt.
226 * Notes: remember to free pte when you're done!
227 */
228static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
229 gpt_header *gpt)
230{
231 size_t count;
232 gpt_entry *pte;
233
234 if (!gpt)
235 return NULL;
236
237 count = le32_to_cpu(gpt->num_partition_entries) *
238 le32_to_cpu(gpt->sizeof_partition_entry);
239 if (!count)
240 return NULL;
241 pte = kzalloc(count, GFP_KERNEL);
242 if (!pte)
243 return NULL;
244
245 if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba),
246 (u8 *) pte,
247 count) < count) {
248 kfree(pte);
249 pte=NULL;
250 return NULL;
251 }
252 return pte;
253}
254
255/**
256 * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk
257 * @state
258 * @lba is the Logical Block Address of the partition table
259 *
260 * Description: returns GPT header on success, NULL on error. Allocates
261 * and fills a GPT header starting at @ from @state->bdev.
262 * Note: remember to free gpt when finished with it.
263 */
264static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
265 u64 lba)
266{
267 gpt_header *gpt;
268 unsigned ssz = bdev_logical_block_size(state->bdev);
269
270 gpt = kzalloc(ssz, GFP_KERNEL);
271 if (!gpt)
272 return NULL;
273
274 if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) {
275 kfree(gpt);
276 gpt=NULL;
277 return NULL;
278 }
279
280 return gpt;
281}
282
283/**
284 * is_gpt_valid() - tests one GPT header and PTEs for validity
285 * @state
286 * @lba is the logical block address of the GPT header to test
287 * @gpt is a GPT header ptr, filled on return.
288 * @ptes is a PTEs ptr, filled on return.
289 *
290 * Description: returns 1 if valid, 0 on error.
291 * If valid, returns pointers to newly allocated GPT header and PTEs.
292 */
293static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
294 gpt_header **gpt, gpt_entry **ptes)
295{
296 u32 crc, origcrc;
297 u64 lastlba;
298
299 if (!ptes)
300 return 0;
301 if (!(*gpt = alloc_read_gpt_header(state, lba)))
302 return 0;
303
304 /* Check the GUID Partition Table signature */
305 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
306 pr_debug("GUID Partition Table Header signature is wrong:"
307 "%lld != %lld\n",
308 (unsigned long long)le64_to_cpu((*gpt)->signature),
309 (unsigned long long)GPT_HEADER_SIGNATURE);
310 goto fail;
311 }
312
313 /* Check the GUID Partition Table header size */
314 if (le32_to_cpu((*gpt)->header_size) >
315 bdev_logical_block_size(state->bdev)) {
316 pr_debug("GUID Partition Table Header size is wrong: %u > %u\n",
317 le32_to_cpu((*gpt)->header_size),
318 bdev_logical_block_size(state->bdev));
319 goto fail;
320 }
321
322 /* Check the GUID Partition Table CRC */
323 origcrc = le32_to_cpu((*gpt)->header_crc32);
324 (*gpt)->header_crc32 = 0;
325 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
326
327 if (crc != origcrc) {
328 pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
329 crc, origcrc);
330 goto fail;
331 }
332 (*gpt)->header_crc32 = cpu_to_le32(origcrc);
333
334 /* Check that the my_lba entry points to the LBA that contains
335 * the GUID Partition Table */
336 if (le64_to_cpu((*gpt)->my_lba) != lba) {
337 pr_debug("GPT my_lba incorrect: %lld != %lld\n",
338 (unsigned long long)le64_to_cpu((*gpt)->my_lba),
339 (unsigned long long)lba);
340 goto fail;
341 }
342
343 /* Check the first_usable_lba and last_usable_lba are
344 * within the disk.
345 */
346 lastlba = last_lba(state->bdev);
347 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
348 pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
349 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
350 (unsigned long long)lastlba);
351 goto fail;
352 }
353 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
354 pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
355 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
356 (unsigned long long)lastlba);
357 goto fail;
358 }
359
360 /* Check that sizeof_partition_entry has the correct value */
361 if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
362 pr_debug("GUID Partitition Entry Size check failed.\n");
363 goto fail;
364 }
365
366 if (!(*ptes = alloc_read_gpt_entries(state, *gpt)))
367 goto fail;
368
369 /* Check the GUID Partition Entry Array CRC */
370 crc = efi_crc32((const unsigned char *) (*ptes),
371 le32_to_cpu((*gpt)->num_partition_entries) *
372 le32_to_cpu((*gpt)->sizeof_partition_entry));
373
374 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
375 pr_debug("GUID Partitition Entry Array CRC check failed.\n");
376 goto fail_ptes;
377 }
378
379 /* We're done, all's well */
380 return 1;
381
382 fail_ptes:
383 kfree(*ptes);
384 *ptes = NULL;
385 fail:
386 kfree(*gpt);
387 *gpt = NULL;
388 return 0;
389}
390
391/**
392 * is_pte_valid() - tests one PTE for validity
393 * @pte is the pte to check
394 * @lastlba is last lba of the disk
395 *
396 * Description: returns 1 if valid, 0 on error.
397 */
398static inline int
399is_pte_valid(const gpt_entry *pte, const u64 lastlba)
400{
401 if ((!efi_guidcmp(pte->partition_type_guid, NULL_GUID)) ||
402 le64_to_cpu(pte->starting_lba) > lastlba ||
403 le64_to_cpu(pte->ending_lba) > lastlba)
404 return 0;
405 return 1;
406}
407
408/**
409 * compare_gpts() - Search disk for valid GPT headers and PTEs
410 * @pgpt is the primary GPT header
411 * @agpt is the alternate GPT header
412 * @lastlba is the last LBA number
413 * Description: Returns nothing. Sanity checks pgpt and agpt fields
414 * and prints warnings on discrepancies.
415 *
416 */
417static void
418compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
419{
420 int error_found = 0;
421 if (!pgpt || !agpt)
422 return;
423 if (le64_to_cpu(pgpt->my_lba) != le64_to_cpu(agpt->alternate_lba)) {
424 printk(KERN_WARNING
425 "GPT:Primary header LBA != Alt. header alternate_lba\n");
426 printk(KERN_WARNING "GPT:%lld != %lld\n",
427 (unsigned long long)le64_to_cpu(pgpt->my_lba),
428 (unsigned long long)le64_to_cpu(agpt->alternate_lba));
429 error_found++;
430 }
431 if (le64_to_cpu(pgpt->alternate_lba) != le64_to_cpu(agpt->my_lba)) {
432 printk(KERN_WARNING
433 "GPT:Primary header alternate_lba != Alt. header my_lba\n");
434 printk(KERN_WARNING "GPT:%lld != %lld\n",
435 (unsigned long long)le64_to_cpu(pgpt->alternate_lba),
436 (unsigned long long)le64_to_cpu(agpt->my_lba));
437 error_found++;
438 }
439 if (le64_to_cpu(pgpt->first_usable_lba) !=
440 le64_to_cpu(agpt->first_usable_lba)) {
441 printk(KERN_WARNING "GPT:first_usable_lbas don't match.\n");
442 printk(KERN_WARNING "GPT:%lld != %lld\n",
443 (unsigned long long)le64_to_cpu(pgpt->first_usable_lba),
444 (unsigned long long)le64_to_cpu(agpt->first_usable_lba));
445 error_found++;
446 }
447 if (le64_to_cpu(pgpt->last_usable_lba) !=
448 le64_to_cpu(agpt->last_usable_lba)) {
449 printk(KERN_WARNING "GPT:last_usable_lbas don't match.\n");
450 printk(KERN_WARNING "GPT:%lld != %lld\n",
451 (unsigned long long)le64_to_cpu(pgpt->last_usable_lba),
452 (unsigned long long)le64_to_cpu(agpt->last_usable_lba));
453 error_found++;
454 }
455 if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) {
456 printk(KERN_WARNING "GPT:disk_guids don't match.\n");
457 error_found++;
458 }
459 if (le32_to_cpu(pgpt->num_partition_entries) !=
460 le32_to_cpu(agpt->num_partition_entries)) {
461 printk(KERN_WARNING "GPT:num_partition_entries don't match: "
462 "0x%x != 0x%x\n",
463 le32_to_cpu(pgpt->num_partition_entries),
464 le32_to_cpu(agpt->num_partition_entries));
465 error_found++;
466 }
467 if (le32_to_cpu(pgpt->sizeof_partition_entry) !=
468 le32_to_cpu(agpt->sizeof_partition_entry)) {
469 printk(KERN_WARNING
470 "GPT:sizeof_partition_entry values don't match: "
471 "0x%x != 0x%x\n",
472 le32_to_cpu(pgpt->sizeof_partition_entry),
473 le32_to_cpu(agpt->sizeof_partition_entry));
474 error_found++;
475 }
476 if (le32_to_cpu(pgpt->partition_entry_array_crc32) !=
477 le32_to_cpu(agpt->partition_entry_array_crc32)) {
478 printk(KERN_WARNING
479 "GPT:partition_entry_array_crc32 values don't match: "
480 "0x%x != 0x%x\n",
481 le32_to_cpu(pgpt->partition_entry_array_crc32),
482 le32_to_cpu(agpt->partition_entry_array_crc32));
483 error_found++;
484 }
485 if (le64_to_cpu(pgpt->alternate_lba) != lastlba) {
486 printk(KERN_WARNING
487 "GPT:Primary header thinks Alt. header is not at the end of the disk.\n");
488 printk(KERN_WARNING "GPT:%lld != %lld\n",
489 (unsigned long long)le64_to_cpu(pgpt->alternate_lba),
490 (unsigned long long)lastlba);
491 error_found++;
492 }
493
494 if (le64_to_cpu(agpt->my_lba) != lastlba) {
495 printk(KERN_WARNING
496 "GPT:Alternate GPT header not at the end of the disk.\n");
497 printk(KERN_WARNING "GPT:%lld != %lld\n",
498 (unsigned long long)le64_to_cpu(agpt->my_lba),
499 (unsigned long long)lastlba);
500 error_found++;
501 }
502
503 if (error_found)
504 printk(KERN_WARNING
505 "GPT: Use GNU Parted to correct GPT errors.\n");
506 return;
507}
508
509/**
510 * find_valid_gpt() - Search disk for valid GPT headers and PTEs
511 * @state
512 * @gpt is a GPT header ptr, filled on return.
513 * @ptes is a PTEs ptr, filled on return.
514 * Description: Returns 1 if valid, 0 on error.
515 * If valid, returns pointers to newly allocated GPT header and PTEs.
516 * Validity depends on PMBR being valid (or being overridden by the
517 * 'gpt' kernel command line option) and finding either the Primary
518 * GPT header and PTEs valid, or the Alternate GPT header and PTEs
519 * valid. If the Primary GPT header is not valid, the Alternate GPT header
520 * is not checked unless the 'gpt' kernel command line option is passed.
521 * This protects against devices which misreport their size, and forces
522 * the user to decide to use the Alternate GPT.
523 */
524static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
525 gpt_entry **ptes)
526{
527 int good_pgpt = 0, good_agpt = 0, good_pmbr = 0;
528 gpt_header *pgpt = NULL, *agpt = NULL;
529 gpt_entry *pptes = NULL, *aptes = NULL;
530 legacy_mbr *legacymbr;
531 u64 lastlba;
532
533 if (!ptes)
534 return 0;
535
536 lastlba = last_lba(state->bdev);
537 if (!force_gpt) {
538 /* This will be added to the EFI Spec. per Intel after v1.02. */
539 legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL);
540 if (legacymbr) {
541 read_lba(state, 0, (u8 *) legacymbr,
542 sizeof (*legacymbr));
543 good_pmbr = is_pmbr_valid(legacymbr);
544 kfree(legacymbr);
545 }
546 if (!good_pmbr)
547 goto fail;
548 }
549
550 good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA,
551 &pgpt, &pptes);
552 if (good_pgpt)
553 good_agpt = is_gpt_valid(state,
554 le64_to_cpu(pgpt->alternate_lba),
555 &agpt, &aptes);
556 if (!good_agpt && force_gpt)
557 good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes);
558
559 /* The obviously unsuccessful case */
560 if (!good_pgpt && !good_agpt)
561 goto fail;
562
563 compare_gpts(pgpt, agpt, lastlba);
564
565 /* The good cases */
566 if (good_pgpt) {
567 *gpt = pgpt;
568 *ptes = pptes;
569 kfree(agpt);
570 kfree(aptes);
571 if (!good_agpt) {
572 printk(KERN_WARNING
573 "Alternate GPT is invalid, "
574 "using primary GPT.\n");
575 }
576 return 1;
577 }
578 else if (good_agpt) {
579 *gpt = agpt;
580 *ptes = aptes;
581 kfree(pgpt);
582 kfree(pptes);
583 printk(KERN_WARNING
584 "Primary GPT is invalid, using alternate GPT.\n");
585 return 1;
586 }
587
588 fail:
589 kfree(pgpt);
590 kfree(agpt);
591 kfree(pptes);
592 kfree(aptes);
593 *gpt = NULL;
594 *ptes = NULL;
595 return 0;
596}
597
598/**
599 * efi_partition(struct parsed_partitions *state)
600 * @state
601 *
602 * Description: called from check.c, if the disk contains GPT
603 * partitions, sets up partition entries in the kernel.
604 *
605 * If the first block on the disk is a legacy MBR,
606 * it will get handled by msdos_partition().
607 * If it's a Protective MBR, we'll handle it here.
608 *
609 * We do not create a Linux partition for GPT, but
610 * only for the actual data partitions.
611 * Returns:
612 * -1 if unable to read the partition table
613 * 0 if this isn't our partition table
614 * 1 if successful
615 *
616 */
617int efi_partition(struct parsed_partitions *state)
618{
619 gpt_header *gpt = NULL;
620 gpt_entry *ptes = NULL;
621 u32 i;
622 unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
623 u8 unparsed_guid[37];
624
625 if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
626 kfree(gpt);
627 kfree(ptes);
628 return 0;
629 }
630
631 pr_debug("GUID Partition Table is valid! Yea!\n");
632
633 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
634 struct partition_meta_info *info;
635 unsigned label_count = 0;
636 unsigned label_max;
637 u64 start = le64_to_cpu(ptes[i].starting_lba);
638 u64 size = le64_to_cpu(ptes[i].ending_lba) -
639 le64_to_cpu(ptes[i].starting_lba) + 1ULL;
640
641 if (!is_pte_valid(&ptes[i], last_lba(state->bdev)))
642 continue;
643
644 put_partition(state, i+1, start * ssz, size * ssz);
645
646 /* If this is a RAID volume, tell md */
647 if (!efi_guidcmp(ptes[i].partition_type_guid,
648 PARTITION_LINUX_RAID_GUID))
649 state->parts[i + 1].flags = ADDPART_FLAG_RAID;
650
651 info = &state->parts[i + 1].info;
652 /* Instead of doing a manual swap to big endian, reuse the
653 * common ASCII hex format as the interim.
654 */
655 efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
656 part_pack_uuid(unparsed_guid, info->uuid);
657
658 /* Naively convert UTF16-LE to 7 bits. */
659 label_max = min(sizeof(info->volname) - 1,
660 sizeof(ptes[i].partition_name));
661 info->volname[label_max] = 0;
662 while (label_count < label_max) {
663 u8 c = ptes[i].partition_name[label_count] & 0xff;
664 if (c && !isprint(c))
665 c = '!';
666 info->volname[label_count] = c;
667 label_count++;
668 }
669 state->parts[i + 1].has_info = true;
670 }
671 kfree(ptes);
672 kfree(gpt);
673 strlcat(state->pp_buf, "\n", PAGE_SIZE);
674 return 1;
675}
diff --git a/fs/partitions/efi.h b/fs/partitions/efi.h
deleted file mode 100644
index b69ab729558f..000000000000
--- a/fs/partitions/efi.h
+++ /dev/null
@@ -1,134 +0,0 @@
1/************************************************************
2 * EFI GUID Partition Table
3 * Per Intel EFI Specification v1.02
4 * http://developer.intel.com/technology/efi/efi.htm
5 *
6 * By Matt Domsch <Matt_Domsch@dell.com> Fri Sep 22 22:15:56 CDT 2000
7 * Copyright 2000,2001 Dell Inc.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 ************************************************************/
24
25#ifndef FS_PART_EFI_H_INCLUDED
26#define FS_PART_EFI_H_INCLUDED
27
28#include <linux/types.h>
29#include <linux/fs.h>
30#include <linux/genhd.h>
31#include <linux/kernel.h>
32#include <linux/major.h>
33#include <linux/string.h>
34#include <linux/efi.h>
35
36#define MSDOS_MBR_SIGNATURE 0xaa55
37#define EFI_PMBR_OSTYPE_EFI 0xEF
38#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE
39
40#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL
41#define GPT_HEADER_REVISION_V1 0x00010000
42#define GPT_PRIMARY_PARTITION_TABLE_LBA 1
43
44#define PARTITION_SYSTEM_GUID \
45 EFI_GUID( 0xC12A7328, 0xF81F, 0x11d2, \
46 0xBA, 0x4B, 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B)
47#define LEGACY_MBR_PARTITION_GUID \
48 EFI_GUID( 0x024DEE41, 0x33E7, 0x11d3, \
49 0x9D, 0x69, 0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F)
50#define PARTITION_MSFT_RESERVED_GUID \
51 EFI_GUID( 0xE3C9E316, 0x0B5C, 0x4DB8, \
52 0x81, 0x7D, 0xF9, 0x2D, 0xF0, 0x02, 0x15, 0xAE)
53#define PARTITION_BASIC_DATA_GUID \
54 EFI_GUID( 0xEBD0A0A2, 0xB9E5, 0x4433, \
55 0x87, 0xC0, 0x68, 0xB6, 0xB7, 0x26, 0x99, 0xC7)
56#define PARTITION_LINUX_RAID_GUID \
57 EFI_GUID( 0xa19d880f, 0x05fc, 0x4d3b, \
58 0xa0, 0x06, 0x74, 0x3f, 0x0f, 0x84, 0x91, 0x1e)
59#define PARTITION_LINUX_SWAP_GUID \
60 EFI_GUID( 0x0657fd6d, 0xa4ab, 0x43c4, \
61 0x84, 0xe5, 0x09, 0x33, 0xc8, 0x4b, 0x4f, 0x4f)
62#define PARTITION_LINUX_LVM_GUID \
63 EFI_GUID( 0xe6d6d379, 0xf507, 0x44c2, \
64 0xa2, 0x3c, 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28)
65
66typedef struct _gpt_header {
67 __le64 signature;
68 __le32 revision;
69 __le32 header_size;
70 __le32 header_crc32;
71 __le32 reserved1;
72 __le64 my_lba;
73 __le64 alternate_lba;
74 __le64 first_usable_lba;
75 __le64 last_usable_lba;
76 efi_guid_t disk_guid;
77 __le64 partition_entry_lba;
78 __le32 num_partition_entries;
79 __le32 sizeof_partition_entry;
80 __le32 partition_entry_array_crc32;
81
82 /* The rest of the logical block is reserved by UEFI and must be zero.
83 * EFI standard handles this by:
84 *
85 * uint8_t reserved2[ BlockSize - 92 ];
86 */
87} __attribute__ ((packed)) gpt_header;
88
89typedef struct _gpt_entry_attributes {
90 u64 required_to_function:1;
91 u64 reserved:47;
92 u64 type_guid_specific:16;
93} __attribute__ ((packed)) gpt_entry_attributes;
94
95typedef struct _gpt_entry {
96 efi_guid_t partition_type_guid;
97 efi_guid_t unique_partition_guid;
98 __le64 starting_lba;
99 __le64 ending_lba;
100 gpt_entry_attributes attributes;
101 efi_char16_t partition_name[72 / sizeof (efi_char16_t)];
102} __attribute__ ((packed)) gpt_entry;
103
104typedef struct _legacy_mbr {
105 u8 boot_code[440];
106 __le32 unique_mbr_signature;
107 __le16 unknown;
108 struct partition partition_record[4];
109 __le16 signature;
110} __attribute__ ((packed)) legacy_mbr;
111
112/* Functions */
113extern int efi_partition(struct parsed_partitions *state);
114
115#endif
116
117/*
118 * Overrides for Emacs so that we follow Linus's tabbing style.
119 * Emacs will notice this stuff at the end of the file and automatically
120 * adjust the settings for this buffer only. This must remain at the end
121 * of the file.
122 * --------------------------------------------------------------------------
123 * Local variables:
124 * c-indent-level: 4
125 * c-brace-imaginary-offset: 0
126 * c-brace-offset: -4
127 * c-argdecl-indent: 4
128 * c-label-offset: -4
129 * c-continued-statement-offset: 4
130 * c-continued-brace-offset: 0
131 * indent-tabs-mode: nil
132 * tab-width: 8
133 * End:
134 */
diff --git a/fs/partitions/ibm.c b/fs/partitions/ibm.c
deleted file mode 100644
index d513a07f44bb..000000000000
--- a/fs/partitions/ibm.c
+++ /dev/null
@@ -1,275 +0,0 @@
1/*
2 * File...........: linux/fs/partitions/ibm.c
3 * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
4 * Volker Sameske <sameske@de.ibm.com>
5 * Bugreports.to..: <Linux390@de.ibm.com>
6 * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000
7 */
8
9#include <linux/buffer_head.h>
10#include <linux/hdreg.h>
11#include <linux/slab.h>
12#include <asm/dasd.h>
13#include <asm/ebcdic.h>
14#include <asm/uaccess.h>
15#include <asm/vtoc.h>
16
17#include "check.h"
18#include "ibm.h"
19
20/*
21 * compute the block number from a
22 * cyl-cyl-head-head structure
23 */
24static sector_t
25cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) {
26
27 sector_t cyl;
28 __u16 head;
29
30 /*decode cylinder and heads for large volumes */
31 cyl = ptr->hh & 0xFFF0;
32 cyl <<= 12;
33 cyl |= ptr->cc;
34 head = ptr->hh & 0x000F;
35 return cyl * geo->heads * geo->sectors +
36 head * geo->sectors;
37}
38
39/*
40 * compute the block number from a
41 * cyl-cyl-head-head-block structure
42 */
43static sector_t
44cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) {
45
46 sector_t cyl;
47 __u16 head;
48
49 /*decode cylinder and heads for large volumes */
50 cyl = ptr->hh & 0xFFF0;
51 cyl <<= 12;
52 cyl |= ptr->cc;
53 head = ptr->hh & 0x000F;
54 return cyl * geo->heads * geo->sectors +
55 head * geo->sectors +
56 ptr->b;
57}
58
59/*
60 */
61int ibm_partition(struct parsed_partitions *state)
62{
63 struct block_device *bdev = state->bdev;
64 int blocksize, res;
65 loff_t i_size, offset, size, fmt_size;
66 dasd_information2_t *info;
67 struct hd_geometry *geo;
68 char type[5] = {0,};
69 char name[7] = {0,};
70 union label_t {
71 struct vtoc_volume_label_cdl vol;
72 struct vtoc_volume_label_ldl lnx;
73 struct vtoc_cms_label cms;
74 } *label;
75 unsigned char *data;
76 Sector sect;
77 sector_t labelsect;
78 char tmp[64];
79
80 res = 0;
81 blocksize = bdev_logical_block_size(bdev);
82 if (blocksize <= 0)
83 goto out_exit;
84 i_size = i_size_read(bdev->bd_inode);
85 if (i_size == 0)
86 goto out_exit;
87
88 info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL);
89 if (info == NULL)
90 goto out_exit;
91 geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL);
92 if (geo == NULL)
93 goto out_nogeo;
94 label = kmalloc(sizeof(union label_t), GFP_KERNEL);
95 if (label == NULL)
96 goto out_nolab;
97
98 if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 ||
99 ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0)
100 goto out_freeall;
101
102 /*
103 * Special case for FBA disks: label sector does not depend on
104 * blocksize.
105 */
106 if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) ||
107 (info->cu_type == 0x3880 && info->dev_type == 0x3370))
108 labelsect = info->label_block;
109 else
110 labelsect = info->label_block * (blocksize >> 9);
111
112 /*
113 * Get volume label, extract name and type.
114 */
115 data = read_part_sector(state, labelsect, &sect);
116 if (data == NULL)
117 goto out_readerr;
118
119 memcpy(label, data, sizeof(union label_t));
120 put_dev_sector(sect);
121
122 if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) {
123 strncpy(type, label->vol.vollbl, 4);
124 strncpy(name, label->vol.volid, 6);
125 } else {
126 strncpy(type, label->lnx.vollbl, 4);
127 strncpy(name, label->lnx.volid, 6);
128 }
129 EBCASC(type, 4);
130 EBCASC(name, 6);
131
132 res = 1;
133
134 /*
135 * Three different formats: LDL, CDL and unformated disk
136 *
137 * identified by info->format
138 *
139 * unformated disks we do not have to care about
140 */
141 if (info->format == DASD_FORMAT_LDL) {
142 if (strncmp(type, "CMS1", 4) == 0) {
143 /*
144 * VM style CMS1 labeled disk
145 */
146 blocksize = label->cms.block_size;
147 if (label->cms.disk_offset != 0) {
148 snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name);
149 strlcat(state->pp_buf, tmp, PAGE_SIZE);
150 /* disk is reserved minidisk */
151 offset = label->cms.disk_offset;
152 size = (label->cms.block_count - 1)
153 * (blocksize >> 9);
154 } else {
155 snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name);
156 strlcat(state->pp_buf, tmp, PAGE_SIZE);
157 offset = (info->label_block + 1);
158 size = label->cms.block_count
159 * (blocksize >> 9);
160 }
161 put_partition(state, 1, offset*(blocksize >> 9),
162 size-offset*(blocksize >> 9));
163 } else {
164 if (strncmp(type, "LNX1", 4) == 0) {
165 snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name);
166 strlcat(state->pp_buf, tmp, PAGE_SIZE);
167 if (label->lnx.ldl_version == 0xf2) {
168 fmt_size = label->lnx.formatted_blocks
169 * (blocksize >> 9);
170 } else if (!strcmp(info->type, "ECKD")) {
171 /* formated w/o large volume support */
172 fmt_size = geo->cylinders * geo->heads
173 * geo->sectors * (blocksize >> 9);
174 } else {
175 /* old label and no usable disk geometry
176 * (e.g. DIAG) */
177 fmt_size = i_size >> 9;
178 }
179 size = i_size >> 9;
180 if (fmt_size < size)
181 size = fmt_size;
182 offset = (info->label_block + 1);
183 } else {
184 /* unlabeled disk */
185 strlcat(state->pp_buf, "(nonl)", PAGE_SIZE);
186 size = i_size >> 9;
187 offset = (info->label_block + 1);
188 }
189 put_partition(state, 1, offset*(blocksize >> 9),
190 size-offset*(blocksize >> 9));
191 }
192 } else if (info->format == DASD_FORMAT_CDL) {
193 /*
194 * New style CDL formatted disk
195 */
196 sector_t blk;
197 int counter;
198
199 /*
200 * check if VOL1 label is available
201 * if not, something is wrong, skipping partition detection
202 */
203 if (strncmp(type, "VOL1", 4) == 0) {
204 snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name);
205 strlcat(state->pp_buf, tmp, PAGE_SIZE);
206 /*
207 * get block number and read then go through format1
208 * labels
209 */
210 blk = cchhb2blk(&label->vol.vtoc, geo) + 1;
211 counter = 0;
212 data = read_part_sector(state, blk * (blocksize/512),
213 &sect);
214 while (data != NULL) {
215 struct vtoc_format1_label f1;
216
217 memcpy(&f1, data,
218 sizeof(struct vtoc_format1_label));
219 put_dev_sector(sect);
220
221 /* skip FMT4 / FMT5 / FMT7 labels */
222 if (f1.DS1FMTID == _ascebc['4']
223 || f1.DS1FMTID == _ascebc['5']
224 || f1.DS1FMTID == _ascebc['7']
225 || f1.DS1FMTID == _ascebc['9']) {
226 blk++;
227 data = read_part_sector(state,
228 blk * (blocksize/512), &sect);
229 continue;
230 }
231
232 /* only FMT1 and 8 labels valid at this point */
233 if (f1.DS1FMTID != _ascebc['1'] &&
234 f1.DS1FMTID != _ascebc['8'])
235 break;
236
237 /* OK, we got valid partition data */
238 offset = cchh2blk(&f1.DS1EXT1.llimit, geo);
239 size = cchh2blk(&f1.DS1EXT1.ulimit, geo) -
240 offset + geo->sectors;
241 if (counter >= state->limit)
242 break;
243 put_partition(state, counter + 1,
244 offset * (blocksize >> 9),
245 size * (blocksize >> 9));
246 counter++;
247 blk++;
248 data = read_part_sector(state,
249 blk * (blocksize/512), &sect);
250 }
251
252 if (!data)
253 /* Are we not supposed to report this ? */
254 goto out_readerr;
255 } else
256 printk(KERN_WARNING "Warning, expected Label VOL1 not "
257 "found, treating as CDL formated Disk");
258
259 }
260
261 strlcat(state->pp_buf, "\n", PAGE_SIZE);
262 goto out_freeall;
263
264
265out_readerr:
266 res = -1;
267out_freeall:
268 kfree(label);
269out_nolab:
270 kfree(geo);
271out_nogeo:
272 kfree(info);
273out_exit:
274 return res;
275}
diff --git a/fs/partitions/ibm.h b/fs/partitions/ibm.h
deleted file mode 100644
index 08fb0804a812..000000000000
--- a/fs/partitions/ibm.h
+++ /dev/null
@@ -1 +0,0 @@
1int ibm_partition(struct parsed_partitions *);
diff --git a/fs/partitions/karma.c b/fs/partitions/karma.c
deleted file mode 100644
index 0ea19312706b..000000000000
--- a/fs/partitions/karma.c
+++ /dev/null
@@ -1,57 +0,0 @@
1/*
2 * fs/partitions/karma.c
3 * Rio Karma partition info.
4 *
5 * Copyright (C) 2006 Bob Copeland (me@bobcopeland.com)
6 * based on osf.c
7 */
8
9#include "check.h"
10#include "karma.h"
11
12int karma_partition(struct parsed_partitions *state)
13{
14 int i;
15 int slot = 1;
16 Sector sect;
17 unsigned char *data;
18 struct disklabel {
19 u8 d_reserved[270];
20 struct d_partition {
21 __le32 p_res;
22 u8 p_fstype;
23 u8 p_res2[3];
24 __le32 p_offset;
25 __le32 p_size;
26 } d_partitions[2];
27 u8 d_blank[208];
28 __le16 d_magic;
29 } __attribute__((packed)) *label;
30 struct d_partition *p;
31
32 data = read_part_sector(state, 0, &sect);
33 if (!data)
34 return -1;
35
36 label = (struct disklabel *)data;
37 if (le16_to_cpu(label->d_magic) != KARMA_LABEL_MAGIC) {
38 put_dev_sector(sect);
39 return 0;
40 }
41
42 p = label->d_partitions;
43 for (i = 0 ; i < 2; i++, p++) {
44 if (slot == state->limit)
45 break;
46
47 if (p->p_fstype == 0x4d && le32_to_cpu(p->p_size)) {
48 put_partition(state, slot, le32_to_cpu(p->p_offset),
49 le32_to_cpu(p->p_size));
50 }
51 slot++;
52 }
53 strlcat(state->pp_buf, "\n", PAGE_SIZE);
54 put_dev_sector(sect);
55 return 1;
56}
57
diff --git a/fs/partitions/karma.h b/fs/partitions/karma.h
deleted file mode 100644
index c764b2e9df21..000000000000
--- a/fs/partitions/karma.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/*
2 * fs/partitions/karma.h
3 */
4
5#define KARMA_LABEL_MAGIC 0xAB56
6
7int karma_partition(struct parsed_partitions *state);
8
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
deleted file mode 100644
index bd8ae788f689..000000000000
--- a/fs/partitions/ldm.c
+++ /dev/null
@@ -1,1570 +0,0 @@
1/**
2 * ldm - Support for Windows Logical Disk Manager (Dynamic Disks)
3 *
4 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
5 * Copyright (c) 2001-2007 Anton Altaparmakov
6 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
7 *
8 * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads
9 *
10 * This program is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU General Public License as published by the Free Software
12 * Foundation; either version 2 of the License, or (at your option) any later
13 * version.
14 *
15 * This program is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public License along with
21 * this program (in the main directory of the source in the file COPYING); if
22 * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
23 * Boston, MA 02111-1307 USA
24 */
25
26#include <linux/slab.h>
27#include <linux/pagemap.h>
28#include <linux/stringify.h>
29#include <linux/kernel.h>
30#include "ldm.h"
31#include "check.h"
32#include "msdos.h"
33
34/**
35 * ldm_debug/info/error/crit - Output an error message
36 * @f: A printf format string containing the message
37 * @...: Variables to substitute into @f
38 *
39 * ldm_debug() writes a DEBUG level message to the syslog but only if the
40 * driver was compiled with debug enabled. Otherwise, the call turns into a NOP.
41 */
42#ifndef CONFIG_LDM_DEBUG
43#define ldm_debug(...) do {} while (0)
44#else
45#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
46#endif
47
48#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a)
49#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
50#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
51
52static __printf(3, 4)
53void _ldm_printk(const char *level, const char *function, const char *fmt, ...)
54{
55 struct va_format vaf;
56 va_list args;
57
58 va_start (args, fmt);
59
60 vaf.fmt = fmt;
61 vaf.va = &args;
62
63 printk("%s%s(): %pV\n", level, function, &vaf);
64
65 va_end(args);
66}
67
68/**
69 * ldm_parse_hexbyte - Convert a ASCII hex number to a byte
70 * @src: Pointer to at least 2 characters to convert.
71 *
72 * Convert a two character ASCII hex string to a number.
73 *
74 * Return: 0-255 Success, the byte was parsed correctly
75 * -1 Error, an invalid character was supplied
76 */
77static int ldm_parse_hexbyte (const u8 *src)
78{
79 unsigned int x; /* For correct wrapping */
80 int h;
81
82 /* high part */
83 x = h = hex_to_bin(src[0]);
84 if (h < 0)
85 return -1;
86
87 /* low part */
88 h = hex_to_bin(src[1]);
89 if (h < 0)
90 return -1;
91
92 return (x << 4) + h;
93}
94
95/**
96 * ldm_parse_guid - Convert GUID from ASCII to binary
97 * @src: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
98 * @dest: Memory block to hold binary GUID (16 bytes)
99 *
100 * N.B. The GUID need not be NULL terminated.
101 *
102 * Return: 'true' @dest contains binary GUID
103 * 'false' @dest contents are undefined
104 */
105static bool ldm_parse_guid (const u8 *src, u8 *dest)
106{
107 static const int size[] = { 4, 2, 2, 2, 6 };
108 int i, j, v;
109
110 if (src[8] != '-' || src[13] != '-' ||
111 src[18] != '-' || src[23] != '-')
112 return false;
113
114 for (j = 0; j < 5; j++, src++)
115 for (i = 0; i < size[j]; i++, src+=2, *dest++ = v)
116 if ((v = ldm_parse_hexbyte (src)) < 0)
117 return false;
118
119 return true;
120}
121
122/**
123 * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure
124 * @data: Raw database PRIVHEAD structure loaded from the device
125 * @ph: In-memory privhead structure in which to return parsed information
126 *
127 * This parses the LDM database PRIVHEAD structure supplied in @data and
128 * sets up the in-memory privhead structure @ph with the obtained information.
129 *
130 * Return: 'true' @ph contains the PRIVHEAD data
131 * 'false' @ph contents are undefined
132 */
133static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
134{
135 bool is_vista = false;
136
137 BUG_ON(!data || !ph);
138 if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
139 ldm_error("Cannot find PRIVHEAD structure. LDM database is"
140 " corrupt. Aborting.");
141 return false;
142 }
143 ph->ver_major = get_unaligned_be16(data + 0x000C);
144 ph->ver_minor = get_unaligned_be16(data + 0x000E);
145 ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
146 ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
147 ph->config_start = get_unaligned_be64(data + 0x012B);
148 ph->config_size = get_unaligned_be64(data + 0x0133);
149 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
150 if (ph->ver_major == 2 && ph->ver_minor == 12)
151 is_vista = true;
152 if (!is_vista && (ph->ver_major != 2 || ph->ver_minor != 11)) {
153 ldm_error("Expected PRIVHEAD version 2.11 or 2.12, got %d.%d."
154 " Aborting.", ph->ver_major, ph->ver_minor);
155 return false;
156 }
157 ldm_debug("PRIVHEAD version %d.%d (Windows %s).", ph->ver_major,
158 ph->ver_minor, is_vista ? "Vista" : "2000/XP");
159 if (ph->config_size != LDM_DB_SIZE) { /* 1 MiB in sectors. */
160 /* Warn the user and continue, carefully. */
161 ldm_info("Database is normally %u bytes, it claims to "
162 "be %llu bytes.", LDM_DB_SIZE,
163 (unsigned long long)ph->config_size);
164 }
165 if ((ph->logical_disk_size == 0) || (ph->logical_disk_start +
166 ph->logical_disk_size > ph->config_start)) {
167 ldm_error("PRIVHEAD disk size doesn't match real disk size");
168 return false;
169 }
170 if (!ldm_parse_guid(data + 0x0030, ph->disk_id)) {
171 ldm_error("PRIVHEAD contains an invalid GUID.");
172 return false;
173 }
174 ldm_debug("Parsed PRIVHEAD successfully.");
175 return true;
176}
177
178/**
179 * ldm_parse_tocblock - Read the LDM Database TOCBLOCK structure
180 * @data: Raw database TOCBLOCK structure loaded from the device
181 * @toc: In-memory toc structure in which to return parsed information
182 *
183 * This parses the LDM Database TOCBLOCK (table of contents) structure supplied
184 * in @data and sets up the in-memory tocblock structure @toc with the obtained
185 * information.
186 *
187 * N.B. The *_start and *_size values returned in @toc are not range-checked.
188 *
189 * Return: 'true' @toc contains the TOCBLOCK data
190 * 'false' @toc contents are undefined
191 */
192static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
193{
194 BUG_ON (!data || !toc);
195
196 if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
197 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
198 return false;
199 }
200 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
201 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
202 toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
203 toc->bitmap1_size = get_unaligned_be64(data + 0x36);
204
205 if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
206 sizeof (toc->bitmap1_name)) != 0) {
207 ldm_crit ("TOCBLOCK's first bitmap is '%s', should be '%s'.",
208 TOC_BITMAP1, toc->bitmap1_name);
209 return false;
210 }
211 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
212 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
213 toc->bitmap2_start = get_unaligned_be64(data + 0x50);
214 toc->bitmap2_size = get_unaligned_be64(data + 0x58);
215 if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
216 sizeof (toc->bitmap2_name)) != 0) {
217 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
218 TOC_BITMAP2, toc->bitmap2_name);
219 return false;
220 }
221 ldm_debug ("Parsed TOCBLOCK successfully.");
222 return true;
223}
224
225/**
226 * ldm_parse_vmdb - Read the LDM Database VMDB structure
227 * @data: Raw database VMDB structure loaded from the device
228 * @vm: In-memory vmdb structure in which to return parsed information
229 *
230 * This parses the LDM Database VMDB structure supplied in @data and sets up
231 * the in-memory vmdb structure @vm with the obtained information.
232 *
233 * N.B. The *_start, *_size and *_seq values will be range-checked later.
234 *
235 * Return: 'true' @vm contains VMDB info
236 * 'false' @vm contents are undefined
237 */
238static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
239{
240 BUG_ON (!data || !vm);
241
242 if (MAGIC_VMDB != get_unaligned_be32(data)) {
243 ldm_crit ("Cannot find the VMDB, database may be corrupt.");
244 return false;
245 }
246
247 vm->ver_major = get_unaligned_be16(data + 0x12);
248 vm->ver_minor = get_unaligned_be16(data + 0x14);
249 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
250 ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
251 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
252 return false;
253 }
254
255 vm->vblk_size = get_unaligned_be32(data + 0x08);
256 if (vm->vblk_size == 0) {
257 ldm_error ("Illegal VBLK size");
258 return false;
259 }
260
261 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
262 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
263
264 ldm_debug ("Parsed VMDB successfully.");
265 return true;
266}
267
268/**
269 * ldm_compare_privheads - Compare two privhead objects
270 * @ph1: First privhead
271 * @ph2: Second privhead
272 *
273 * This compares the two privhead structures @ph1 and @ph2.
274 *
275 * Return: 'true' Identical
276 * 'false' Different
277 */
278static bool ldm_compare_privheads (const struct privhead *ph1,
279 const struct privhead *ph2)
280{
281 BUG_ON (!ph1 || !ph2);
282
283 return ((ph1->ver_major == ph2->ver_major) &&
284 (ph1->ver_minor == ph2->ver_minor) &&
285 (ph1->logical_disk_start == ph2->logical_disk_start) &&
286 (ph1->logical_disk_size == ph2->logical_disk_size) &&
287 (ph1->config_start == ph2->config_start) &&
288 (ph1->config_size == ph2->config_size) &&
289 !memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE));
290}
291
292/**
293 * ldm_compare_tocblocks - Compare two tocblock objects
294 * @toc1: First toc
295 * @toc2: Second toc
296 *
297 * This compares the two tocblock structures @toc1 and @toc2.
298 *
299 * Return: 'true' Identical
300 * 'false' Different
301 */
302static bool ldm_compare_tocblocks (const struct tocblock *toc1,
303 const struct tocblock *toc2)
304{
305 BUG_ON (!toc1 || !toc2);
306
307 return ((toc1->bitmap1_start == toc2->bitmap1_start) &&
308 (toc1->bitmap1_size == toc2->bitmap1_size) &&
309 (toc1->bitmap2_start == toc2->bitmap2_start) &&
310 (toc1->bitmap2_size == toc2->bitmap2_size) &&
311 !strncmp (toc1->bitmap1_name, toc2->bitmap1_name,
312 sizeof (toc1->bitmap1_name)) &&
313 !strncmp (toc1->bitmap2_name, toc2->bitmap2_name,
314 sizeof (toc1->bitmap2_name)));
315}
316
317/**
318 * ldm_validate_privheads - Compare the primary privhead with its backups
319 * @state: Partition check state including device holding the LDM Database
320 * @ph1: Memory struct to fill with ph contents
321 *
322 * Read and compare all three privheads from disk.
323 *
324 * The privheads on disk show the size and location of the main disk area and
325 * the configuration area (the database). The values are range-checked against
326 * @hd, which contains the real size of the disk.
327 *
328 * Return: 'true' Success
329 * 'false' Error
330 */
331static bool ldm_validate_privheads(struct parsed_partitions *state,
332 struct privhead *ph1)
333{
334 static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 };
335 struct privhead *ph[3] = { ph1 };
336 Sector sect;
337 u8 *data;
338 bool result = false;
339 long num_sects;
340 int i;
341
342 BUG_ON (!state || !ph1);
343
344 ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL);
345 ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL);
346 if (!ph[1] || !ph[2]) {
347 ldm_crit ("Out of memory.");
348 goto out;
349 }
350
351 /* off[1 & 2] are relative to ph[0]->config_start */
352 ph[0]->config_start = 0;
353
354 /* Read and parse privheads */
355 for (i = 0; i < 3; i++) {
356 data = read_part_sector(state, ph[0]->config_start + off[i],
357 &sect);
358 if (!data) {
359 ldm_crit ("Disk read failed.");
360 goto out;
361 }
362 result = ldm_parse_privhead (data, ph[i]);
363 put_dev_sector (sect);
364 if (!result) {
365 ldm_error ("Cannot find PRIVHEAD %d.", i+1); /* Log again */
366 if (i < 2)
367 goto out; /* Already logged */
368 else
369 break; /* FIXME ignore for now, 3rd PH can fail on odd-sized disks */
370 }
371 }
372
373 num_sects = state->bdev->bd_inode->i_size >> 9;
374
375 if ((ph[0]->config_start > num_sects) ||
376 ((ph[0]->config_start + ph[0]->config_size) > num_sects)) {
377 ldm_crit ("Database extends beyond the end of the disk.");
378 goto out;
379 }
380
381 if ((ph[0]->logical_disk_start > ph[0]->config_start) ||
382 ((ph[0]->logical_disk_start + ph[0]->logical_disk_size)
383 > ph[0]->config_start)) {
384 ldm_crit ("Disk and database overlap.");
385 goto out;
386 }
387
388 if (!ldm_compare_privheads (ph[0], ph[1])) {
389 ldm_crit ("Primary and backup PRIVHEADs don't match.");
390 goto out;
391 }
392 /* FIXME ignore this for now
393 if (!ldm_compare_privheads (ph[0], ph[2])) {
394 ldm_crit ("Primary and backup PRIVHEADs don't match.");
395 goto out;
396 }*/
397 ldm_debug ("Validated PRIVHEADs successfully.");
398 result = true;
399out:
400 kfree (ph[1]);
401 kfree (ph[2]);
402 return result;
403}
404
405/**
406 * ldm_validate_tocblocks - Validate the table of contents and its backups
407 * @state: Partition check state including device holding the LDM Database
408 * @base: Offset, into @state->bdev, of the database
409 * @ldb: Cache of the database structures
410 *
411 * Find and compare the four tables of contents of the LDM Database stored on
412 * @state->bdev and return the parsed information into @toc1.
413 *
414 * The offsets and sizes of the configs are range-checked against a privhead.
415 *
416 * Return: 'true' @toc1 contains validated TOCBLOCK info
417 * 'false' @toc1 contents are undefined
418 */
419static bool ldm_validate_tocblocks(struct parsed_partitions *state,
420 unsigned long base, struct ldmdb *ldb)
421{
422 static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4};
423 struct tocblock *tb[4];
424 struct privhead *ph;
425 Sector sect;
426 u8 *data;
427 int i, nr_tbs;
428 bool result = false;
429
430 BUG_ON(!state || !ldb);
431 ph = &ldb->ph;
432 tb[0] = &ldb->toc;
433 tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL);
434 if (!tb[1]) {
435 ldm_crit("Out of memory.");
436 goto err;
437 }
438 tb[2] = (struct tocblock*)((u8*)tb[1] + sizeof(*tb[1]));
439 tb[3] = (struct tocblock*)((u8*)tb[2] + sizeof(*tb[2]));
440 /*
441 * Try to read and parse all four TOCBLOCKs.
442 *
443 * Windows Vista LDM v2.12 does not always have all four TOCBLOCKs so
444 * skip any that fail as long as we get at least one valid TOCBLOCK.
445 */
446 for (nr_tbs = i = 0; i < 4; i++) {
447 data = read_part_sector(state, base + off[i], &sect);
448 if (!data) {
449 ldm_error("Disk read failed for TOCBLOCK %d.", i);
450 continue;
451 }
452 if (ldm_parse_tocblock(data, tb[nr_tbs]))
453 nr_tbs++;
454 put_dev_sector(sect);
455 }
456 if (!nr_tbs) {
457 ldm_crit("Failed to find a valid TOCBLOCK.");
458 goto err;
459 }
460 /* Range check the TOCBLOCK against a privhead. */
461 if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) ||
462 ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) >
463 ph->config_size)) {
464 ldm_crit("The bitmaps are out of range. Giving up.");
465 goto err;
466 }
467 /* Compare all loaded TOCBLOCKs. */
468 for (i = 1; i < nr_tbs; i++) {
469 if (!ldm_compare_tocblocks(tb[0], tb[i])) {
470 ldm_crit("TOCBLOCKs 0 and %d do not match.", i);
471 goto err;
472 }
473 }
474 ldm_debug("Validated %d TOCBLOCKs successfully.", nr_tbs);
475 result = true;
476err:
477 kfree(tb[1]);
478 return result;
479}
480
481/**
482 * ldm_validate_vmdb - Read the VMDB and validate it
483 * @state: Partition check state including device holding the LDM Database
484 * @base: Offset, into @bdev, of the database
485 * @ldb: Cache of the database structures
486 *
487 * Find the vmdb of the LDM Database stored on @bdev and return the parsed
488 * information in @ldb.
489 *
490 * Return: 'true' @ldb contains validated VBDB info
491 * 'false' @ldb contents are undefined
492 */
493static bool ldm_validate_vmdb(struct parsed_partitions *state,
494 unsigned long base, struct ldmdb *ldb)
495{
496 Sector sect;
497 u8 *data;
498 bool result = false;
499 struct vmdb *vm;
500 struct tocblock *toc;
501
502 BUG_ON (!state || !ldb);
503
504 vm = &ldb->vm;
505 toc = &ldb->toc;
506
507 data = read_part_sector(state, base + OFF_VMDB, &sect);
508 if (!data) {
509 ldm_crit ("Disk read failed.");
510 return false;
511 }
512
513 if (!ldm_parse_vmdb (data, vm))
514 goto out; /* Already logged */
515
516 /* Are there uncommitted transactions? */
517 if (get_unaligned_be16(data + 0x10) != 0x01) {
518 ldm_crit ("Database is not in a consistent state. Aborting.");
519 goto out;
520 }
521
522 if (vm->vblk_offset != 512)
523 ldm_info ("VBLKs start at offset 0x%04x.", vm->vblk_offset);
524
525 /*
526 * The last_vblkd_seq can be before the end of the vmdb, just make sure
527 * it is not out of bounds.
528 */
529 if ((vm->vblk_size * vm->last_vblk_seq) > (toc->bitmap1_size << 9)) {
530 ldm_crit ("VMDB exceeds allowed size specified by TOCBLOCK. "
531 "Database is corrupt. Aborting.");
532 goto out;
533 }
534
535 result = true;
536out:
537 put_dev_sector (sect);
538 return result;
539}
540
541
542/**
543 * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk
544 * @state: Partition check state including device holding the LDM Database
545 *
546 * This function provides a weak test to decide whether the device is a dynamic
547 * disk or not. It looks for an MS-DOS-style partition table containing at
548 * least one partition of type 0x42 (formerly SFS, now used by Windows for
549 * dynamic disks).
550 *
551 * N.B. The only possible error can come from the read_part_sector and that is
552 * only likely to happen if the underlying device is strange. If that IS
553 * the case we should return zero to let someone else try.
554 *
555 * Return: 'true' @state->bdev is a dynamic disk
556 * 'false' @state->bdev is not a dynamic disk, or an error occurred
557 */
558static bool ldm_validate_partition_table(struct parsed_partitions *state)
559{
560 Sector sect;
561 u8 *data;
562 struct partition *p;
563 int i;
564 bool result = false;
565
566 BUG_ON(!state);
567
568 data = read_part_sector(state, 0, &sect);
569 if (!data) {
570 ldm_info ("Disk read failed.");
571 return false;
572 }
573
574 if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC))
575 goto out;
576
577 p = (struct partition*)(data + 0x01BE);
578 for (i = 0; i < 4; i++, p++)
579 if (SYS_IND (p) == LDM_PARTITION) {
580 result = true;
581 break;
582 }
583
584 if (result)
585 ldm_debug ("Found W2K dynamic disk partition type.");
586
587out:
588 put_dev_sector (sect);
589 return result;
590}
591
592/**
593 * ldm_get_disk_objid - Search a linked list of vblk's for a given Disk Id
594 * @ldb: Cache of the database structures
595 *
596 * The LDM Database contains a list of all partitions on all dynamic disks.
597 * The primary PRIVHEAD, at the beginning of the physical disk, tells us
598 * the GUID of this disk. This function searches for the GUID in a linked
599 * list of vblk's.
600 *
601 * Return: Pointer, A matching vblk was found
602 * NULL, No match, or an error
603 */
604static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb)
605{
606 struct list_head *item;
607
608 BUG_ON (!ldb);
609
610 list_for_each (item, &ldb->v_disk) {
611 struct vblk *v = list_entry (item, struct vblk, list);
612 if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE))
613 return v;
614 }
615
616 return NULL;
617}
618
619/**
620 * ldm_create_data_partitions - Create data partitions for this device
621 * @pp: List of the partitions parsed so far
622 * @ldb: Cache of the database structures
623 *
624 * The database contains ALL the partitions for ALL disk groups, so we need to
625 * filter out this specific disk. Using the disk's object id, we can find all
626 * the partitions in the database that belong to this disk.
627 *
628 * Add each partition in our database, to the parsed_partitions structure.
629 *
630 * N.B. This function creates the partitions in the order it finds partition
631 * objects in the linked list.
632 *
633 * Return: 'true' Partition created
634 * 'false' Error, probably a range checking problem
635 */
636static bool ldm_create_data_partitions (struct parsed_partitions *pp,
637 const struct ldmdb *ldb)
638{
639 struct list_head *item;
640 struct vblk *vb;
641 struct vblk *disk;
642 struct vblk_part *part;
643 int part_num = 1;
644
645 BUG_ON (!pp || !ldb);
646
647 disk = ldm_get_disk_objid (ldb);
648 if (!disk) {
649 ldm_crit ("Can't find the ID of this disk in the database.");
650 return false;
651 }
652
653 strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE);
654
655 /* Create the data partitions */
656 list_for_each (item, &ldb->v_part) {
657 vb = list_entry (item, struct vblk, list);
658 part = &vb->vblk.part;
659
660 if (part->disk_id != disk->obj_id)
661 continue;
662
663 put_partition (pp, part_num, ldb->ph.logical_disk_start +
664 part->start, part->size);
665 part_num++;
666 }
667
668 strlcat(pp->pp_buf, "\n", PAGE_SIZE);
669 return true;
670}
671
672
673/**
674 * ldm_relative - Calculate the next relative offset
675 * @buffer: Block of data being worked on
676 * @buflen: Size of the block of data
677 * @base: Size of the previous fixed width fields
678 * @offset: Cumulative size of the previous variable-width fields
679 *
680 * Because many of the VBLK fields are variable-width, it's necessary
681 * to calculate each offset based on the previous one and the length
682 * of the field it pointed to.
683 *
684 * Return: -1 Error, the calculated offset exceeded the size of the buffer
685 * n OK, a range-checked offset into buffer
686 */
687static int ldm_relative(const u8 *buffer, int buflen, int base, int offset)
688{
689
690 base += offset;
691 if (!buffer || offset < 0 || base > buflen) {
692 if (!buffer)
693 ldm_error("!buffer");
694 if (offset < 0)
695 ldm_error("offset (%d) < 0", offset);
696 if (base > buflen)
697 ldm_error("base (%d) > buflen (%d)", base, buflen);
698 return -1;
699 }
700 if (base + buffer[base] >= buflen) {
701 ldm_error("base (%d) + buffer[base] (%d) >= buflen (%d)", base,
702 buffer[base], buflen);
703 return -1;
704 }
705 return buffer[base] + offset + 1;
706}
707
708/**
709 * ldm_get_vnum - Convert a variable-width, big endian number, into cpu order
710 * @block: Pointer to the variable-width number to convert
711 *
712 * Large numbers in the LDM Database are often stored in a packed format. Each
713 * number is prefixed by a one byte width marker. All numbers in the database
714 * are stored in big-endian byte order. This function reads one of these
715 * numbers and returns the result
716 *
717 * N.B. This function DOES NOT perform any range checking, though the most
718 * it will read is eight bytes.
719 *
720 * Return: n A number
721 * 0 Zero, or an error occurred
722 */
723static u64 ldm_get_vnum (const u8 *block)
724{
725 u64 tmp = 0;
726 u8 length;
727
728 BUG_ON (!block);
729
730 length = *block++;
731
732 if (length && length <= 8)
733 while (length--)
734 tmp = (tmp << 8) | *block++;
735 else
736 ldm_error ("Illegal length %d.", length);
737
738 return tmp;
739}
740
741/**
742 * ldm_get_vstr - Read a length-prefixed string into a buffer
743 * @block: Pointer to the length marker
744 * @buffer: Location to copy string to
745 * @buflen: Size of the output buffer
746 *
747 * Many of the strings in the LDM Database are not NULL terminated. Instead
748 * they are prefixed by a one byte length marker. This function copies one of
749 * these strings into a buffer.
750 *
751 * N.B. This function DOES NOT perform any range checking on the input.
752 * If the buffer is too small, the output will be truncated.
753 *
754 * Return: 0, Error and @buffer contents are undefined
755 * n, String length in characters (excluding NULL)
756 * buflen-1, String was truncated.
757 */
758static int ldm_get_vstr (const u8 *block, u8 *buffer, int buflen)
759{
760 int length;
761
762 BUG_ON (!block || !buffer);
763
764 length = block[0];
765 if (length >= buflen) {
766 ldm_error ("Truncating string %d -> %d.", length, buflen);
767 length = buflen - 1;
768 }
769 memcpy (buffer, block + 1, length);
770 buffer[length] = 0;
771 return length;
772}
773
774
775/**
776 * ldm_parse_cmp3 - Read a raw VBLK Component object into a vblk structure
777 * @buffer: Block of data being worked on
778 * @buflen: Size of the block of data
779 * @vb: In-memory vblk in which to return information
780 *
781 * Read a raw VBLK Component object (version 3) into a vblk structure.
782 *
783 * Return: 'true' @vb contains a Component VBLK
784 * 'false' @vb contents are not defined
785 */
786static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
787{
788 int r_objid, r_name, r_vstate, r_child, r_parent, r_stripe, r_cols, len;
789 struct vblk_comp *comp;
790
791 BUG_ON (!buffer || !vb);
792
793 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
794 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
795 r_vstate = ldm_relative (buffer, buflen, 0x18, r_name);
796 r_child = ldm_relative (buffer, buflen, 0x1D, r_vstate);
797 r_parent = ldm_relative (buffer, buflen, 0x2D, r_child);
798
799 if (buffer[0x12] & VBLK_FLAG_COMP_STRIPE) {
800 r_stripe = ldm_relative (buffer, buflen, 0x2E, r_parent);
801 r_cols = ldm_relative (buffer, buflen, 0x2E, r_stripe);
802 len = r_cols;
803 } else {
804 r_stripe = 0;
805 r_cols = 0;
806 len = r_parent;
807 }
808 if (len < 0)
809 return false;
810
811 len += VBLK_SIZE_CMP3;
812 if (len != get_unaligned_be32(buffer + 0x14))
813 return false;
814
815 comp = &vb->vblk.comp;
816 ldm_get_vstr (buffer + 0x18 + r_name, comp->state,
817 sizeof (comp->state));
818 comp->type = buffer[0x18 + r_vstate];
819 comp->children = ldm_get_vnum (buffer + 0x1D + r_vstate);
820 comp->parent_id = ldm_get_vnum (buffer + 0x2D + r_child);
821 comp->chunksize = r_stripe ? ldm_get_vnum (buffer+r_parent+0x2E) : 0;
822
823 return true;
824}
825
826/**
827 * ldm_parse_dgr3 - Read a raw VBLK Disk Group object into a vblk structure
828 * @buffer: Block of data being worked on
829 * @buflen: Size of the block of data
830 * @vb: In-memory vblk in which to return information
831 *
832 * Read a raw VBLK Disk Group object (version 3) into a vblk structure.
833 *
834 * Return: 'true' @vb contains a Disk Group VBLK
835 * 'false' @vb contents are not defined
836 */
837static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
838{
839 int r_objid, r_name, r_diskid, r_id1, r_id2, len;
840 struct vblk_dgrp *dgrp;
841
842 BUG_ON (!buffer || !vb);
843
844 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
845 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
846 r_diskid = ldm_relative (buffer, buflen, 0x18, r_name);
847
848 if (buffer[0x12] & VBLK_FLAG_DGR3_IDS) {
849 r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid);
850 r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1);
851 len = r_id2;
852 } else {
853 r_id1 = 0;
854 r_id2 = 0;
855 len = r_diskid;
856 }
857 if (len < 0)
858 return false;
859
860 len += VBLK_SIZE_DGR3;
861 if (len != get_unaligned_be32(buffer + 0x14))
862 return false;
863
864 dgrp = &vb->vblk.dgrp;
865 ldm_get_vstr (buffer + 0x18 + r_name, dgrp->disk_id,
866 sizeof (dgrp->disk_id));
867 return true;
868}
869
870/**
871 * ldm_parse_dgr4 - Read a raw VBLK Disk Group object into a vblk structure
872 * @buffer: Block of data being worked on
873 * @buflen: Size of the block of data
874 * @vb: In-memory vblk in which to return information
875 *
876 * Read a raw VBLK Disk Group object (version 4) into a vblk structure.
877 *
878 * Return: 'true' @vb contains a Disk Group VBLK
879 * 'false' @vb contents are not defined
880 */
881static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
882{
883 char buf[64];
884 int r_objid, r_name, r_id1, r_id2, len;
885 struct vblk_dgrp *dgrp;
886
887 BUG_ON (!buffer || !vb);
888
889 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
890 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
891
892 if (buffer[0x12] & VBLK_FLAG_DGR4_IDS) {
893 r_id1 = ldm_relative (buffer, buflen, 0x44, r_name);
894 r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1);
895 len = r_id2;
896 } else {
897 r_id1 = 0;
898 r_id2 = 0;
899 len = r_name;
900 }
901 if (len < 0)
902 return false;
903
904 len += VBLK_SIZE_DGR4;
905 if (len != get_unaligned_be32(buffer + 0x14))
906 return false;
907
908 dgrp = &vb->vblk.dgrp;
909
910 ldm_get_vstr (buffer + 0x18 + r_objid, buf, sizeof (buf));
911 return true;
912}
913
914/**
915 * ldm_parse_dsk3 - Read a raw VBLK Disk object into a vblk structure
916 * @buffer: Block of data being worked on
917 * @buflen: Size of the block of data
918 * @vb: In-memory vblk in which to return information
919 *
920 * Read a raw VBLK Disk object (version 3) into a vblk structure.
921 *
922 * Return: 'true' @vb contains a Disk VBLK
923 * 'false' @vb contents are not defined
924 */
925static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
926{
927 int r_objid, r_name, r_diskid, r_altname, len;
928 struct vblk_disk *disk;
929
930 BUG_ON (!buffer || !vb);
931
932 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
933 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
934 r_diskid = ldm_relative (buffer, buflen, 0x18, r_name);
935 r_altname = ldm_relative (buffer, buflen, 0x18, r_diskid);
936 len = r_altname;
937 if (len < 0)
938 return false;
939
940 len += VBLK_SIZE_DSK3;
941 if (len != get_unaligned_be32(buffer + 0x14))
942 return false;
943
944 disk = &vb->vblk.disk;
945 ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name,
946 sizeof (disk->alt_name));
947 if (!ldm_parse_guid (buffer + 0x19 + r_name, disk->disk_id))
948 return false;
949
950 return true;
951}
952
953/**
954 * ldm_parse_dsk4 - Read a raw VBLK Disk object into a vblk structure
955 * @buffer: Block of data being worked on
956 * @buflen: Size of the block of data
957 * @vb: In-memory vblk in which to return information
958 *
959 * Read a raw VBLK Disk object (version 4) into a vblk structure.
960 *
961 * Return: 'true' @vb contains a Disk VBLK
962 * 'false' @vb contents are not defined
963 */
964static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
965{
966 int r_objid, r_name, len;
967 struct vblk_disk *disk;
968
969 BUG_ON (!buffer || !vb);
970
971 r_objid = ldm_relative (buffer, buflen, 0x18, 0);
972 r_name = ldm_relative (buffer, buflen, 0x18, r_objid);
973 len = r_name;
974 if (len < 0)
975 return false;
976
977 len += VBLK_SIZE_DSK4;
978 if (len != get_unaligned_be32(buffer + 0x14))
979 return false;
980
981 disk = &vb->vblk.disk;
982 memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE);
983 return true;
984}
985
986/**
987 * ldm_parse_prt3 - Read a raw VBLK Partition object into a vblk structure
988 * @buffer: Block of data being worked on
989 * @buflen: Size of the block of data
990 * @vb: In-memory vblk in which to return information
991 *
992 * Read a raw VBLK Partition object (version 3) into a vblk structure.
993 *
994 * Return: 'true' @vb contains a Partition VBLK
995 * 'false' @vb contents are not defined
996 */
997static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
998{
999 int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len;
1000 struct vblk_part *part;
1001
1002 BUG_ON(!buffer || !vb);
1003 r_objid = ldm_relative(buffer, buflen, 0x18, 0);
1004 if (r_objid < 0) {
1005 ldm_error("r_objid %d < 0", r_objid);
1006 return false;
1007 }
1008 r_name = ldm_relative(buffer, buflen, 0x18, r_objid);
1009 if (r_name < 0) {
1010 ldm_error("r_name %d < 0", r_name);
1011 return false;
1012 }
1013 r_size = ldm_relative(buffer, buflen, 0x34, r_name);
1014 if (r_size < 0) {
1015 ldm_error("r_size %d < 0", r_size);
1016 return false;
1017 }
1018 r_parent = ldm_relative(buffer, buflen, 0x34, r_size);
1019 if (r_parent < 0) {
1020 ldm_error("r_parent %d < 0", r_parent);
1021 return false;
1022 }
1023 r_diskid = ldm_relative(buffer, buflen, 0x34, r_parent);
1024 if (r_diskid < 0) {
1025 ldm_error("r_diskid %d < 0", r_diskid);
1026 return false;
1027 }
1028 if (buffer[0x12] & VBLK_FLAG_PART_INDEX) {
1029 r_index = ldm_relative(buffer, buflen, 0x34, r_diskid);
1030 if (r_index < 0) {
1031 ldm_error("r_index %d < 0", r_index);
1032 return false;
1033 }
1034 len = r_index;
1035 } else {
1036 r_index = 0;
1037 len = r_diskid;
1038 }
1039 if (len < 0) {
1040 ldm_error("len %d < 0", len);
1041 return false;
1042 }
1043 len += VBLK_SIZE_PRT3;
1044 if (len > get_unaligned_be32(buffer + 0x14)) {
1045 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1046 get_unaligned_be32(buffer + 0x14));
1047 return false;
1048 }
1049 part = &vb->vblk.part;
1050 part->start = get_unaligned_be64(buffer + 0x24 + r_name);
1051 part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
1052 part->size = ldm_get_vnum(buffer + 0x34 + r_name);
1053 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
1054 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
1055 if (vb->flags & VBLK_FLAG_PART_INDEX)
1056 part->partnum = buffer[0x35 + r_diskid];
1057 else
1058 part->partnum = 0;
1059 return true;
1060}
1061
1062/**
1063 * ldm_parse_vol5 - Read a raw VBLK Volume object into a vblk structure
1064 * @buffer: Block of data being worked on
1065 * @buflen: Size of the block of data
1066 * @vb: In-memory vblk in which to return information
1067 *
1068 * Read a raw VBLK Volume object (version 5) into a vblk structure.
1069 *
1070 * Return: 'true' @vb contains a Volume VBLK
1071 * 'false' @vb contents are not defined
1072 */
1073static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
1074{
1075 int r_objid, r_name, r_vtype, r_disable_drive_letter, r_child, r_size;
1076 int r_id1, r_id2, r_size2, r_drive, len;
1077 struct vblk_volu *volu;
1078
1079 BUG_ON(!buffer || !vb);
1080 r_objid = ldm_relative(buffer, buflen, 0x18, 0);
1081 if (r_objid < 0) {
1082 ldm_error("r_objid %d < 0", r_objid);
1083 return false;
1084 }
1085 r_name = ldm_relative(buffer, buflen, 0x18, r_objid);
1086 if (r_name < 0) {
1087 ldm_error("r_name %d < 0", r_name);
1088 return false;
1089 }
1090 r_vtype = ldm_relative(buffer, buflen, 0x18, r_name);
1091 if (r_vtype < 0) {
1092 ldm_error("r_vtype %d < 0", r_vtype);
1093 return false;
1094 }
1095 r_disable_drive_letter = ldm_relative(buffer, buflen, 0x18, r_vtype);
1096 if (r_disable_drive_letter < 0) {
1097 ldm_error("r_disable_drive_letter %d < 0",
1098 r_disable_drive_letter);
1099 return false;
1100 }
1101 r_child = ldm_relative(buffer, buflen, 0x2D, r_disable_drive_letter);
1102 if (r_child < 0) {
1103 ldm_error("r_child %d < 0", r_child);
1104 return false;
1105 }
1106 r_size = ldm_relative(buffer, buflen, 0x3D, r_child);
1107 if (r_size < 0) {
1108 ldm_error("r_size %d < 0", r_size);
1109 return false;
1110 }
1111 if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) {
1112 r_id1 = ldm_relative(buffer, buflen, 0x52, r_size);
1113 if (r_id1 < 0) {
1114 ldm_error("r_id1 %d < 0", r_id1);
1115 return false;
1116 }
1117 } else
1118 r_id1 = r_size;
1119 if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) {
1120 r_id2 = ldm_relative(buffer, buflen, 0x52, r_id1);
1121 if (r_id2 < 0) {
1122 ldm_error("r_id2 %d < 0", r_id2);
1123 return false;
1124 }
1125 } else
1126 r_id2 = r_id1;
1127 if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) {
1128 r_size2 = ldm_relative(buffer, buflen, 0x52, r_id2);
1129 if (r_size2 < 0) {
1130 ldm_error("r_size2 %d < 0", r_size2);
1131 return false;
1132 }
1133 } else
1134 r_size2 = r_id2;
1135 if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) {
1136 r_drive = ldm_relative(buffer, buflen, 0x52, r_size2);
1137 if (r_drive < 0) {
1138 ldm_error("r_drive %d < 0", r_drive);
1139 return false;
1140 }
1141 } else
1142 r_drive = r_size2;
1143 len = r_drive;
1144 if (len < 0) {
1145 ldm_error("len %d < 0", len);
1146 return false;
1147 }
1148 len += VBLK_SIZE_VOL5;
1149 if (len > get_unaligned_be32(buffer + 0x14)) {
1150 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1151 get_unaligned_be32(buffer + 0x14));
1152 return false;
1153 }
1154 volu = &vb->vblk.volu;
1155 ldm_get_vstr(buffer + 0x18 + r_name, volu->volume_type,
1156 sizeof(volu->volume_type));
1157 memcpy(volu->volume_state, buffer + 0x18 + r_disable_drive_letter,
1158 sizeof(volu->volume_state));
1159 volu->size = ldm_get_vnum(buffer + 0x3D + r_child);
1160 volu->partition_type = buffer[0x41 + r_size];
1161 memcpy(volu->guid, buffer + 0x42 + r_size, sizeof(volu->guid));
1162 if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) {
1163 ldm_get_vstr(buffer + 0x52 + r_size, volu->drive_hint,
1164 sizeof(volu->drive_hint));
1165 }
1166 return true;
1167}
1168
1169/**
1170 * ldm_parse_vblk - Read a raw VBLK object into a vblk structure
1171 * @buf: Block of data being worked on
1172 * @len: Size of the block of data
1173 * @vb: In-memory vblk in which to return information
1174 *
1175 * Read a raw VBLK object into a vblk structure. This function just reads the
1176 * information common to all VBLK types, then delegates the rest of the work to
1177 * helper functions: ldm_parse_*.
1178 *
1179 * Return: 'true' @vb contains a VBLK
1180 * 'false' @vb contents are not defined
1181 */
1182static bool ldm_parse_vblk (const u8 *buf, int len, struct vblk *vb)
1183{
1184 bool result = false;
1185 int r_objid;
1186
1187 BUG_ON (!buf || !vb);
1188
1189 r_objid = ldm_relative (buf, len, 0x18, 0);
1190 if (r_objid < 0) {
1191 ldm_error ("VBLK header is corrupt.");
1192 return false;
1193 }
1194
1195 vb->flags = buf[0x12];
1196 vb->type = buf[0x13];
1197 vb->obj_id = ldm_get_vnum (buf + 0x18);
1198 ldm_get_vstr (buf+0x18+r_objid, vb->name, sizeof (vb->name));
1199
1200 switch (vb->type) {
1201 case VBLK_CMP3: result = ldm_parse_cmp3 (buf, len, vb); break;
1202 case VBLK_DSK3: result = ldm_parse_dsk3 (buf, len, vb); break;
1203 case VBLK_DSK4: result = ldm_parse_dsk4 (buf, len, vb); break;
1204 case VBLK_DGR3: result = ldm_parse_dgr3 (buf, len, vb); break;
1205 case VBLK_DGR4: result = ldm_parse_dgr4 (buf, len, vb); break;
1206 case VBLK_PRT3: result = ldm_parse_prt3 (buf, len, vb); break;
1207 case VBLK_VOL5: result = ldm_parse_vol5 (buf, len, vb); break;
1208 }
1209
1210 if (result)
1211 ldm_debug ("Parsed VBLK 0x%llx (type: 0x%02x) ok.",
1212 (unsigned long long) vb->obj_id, vb->type);
1213 else
1214 ldm_error ("Failed to parse VBLK 0x%llx (type: 0x%02x).",
1215 (unsigned long long) vb->obj_id, vb->type);
1216
1217 return result;
1218}
1219
1220
1221/**
1222 * ldm_ldmdb_add - Adds a raw VBLK entry to the ldmdb database
1223 * @data: Raw VBLK to add to the database
1224 * @len: Size of the raw VBLK
1225 * @ldb: Cache of the database structures
1226 *
1227 * The VBLKs are sorted into categories. Partitions are also sorted by offset.
1228 *
1229 * N.B. This function does not check the validity of the VBLKs.
1230 *
1231 * Return: 'true' The VBLK was added
1232 * 'false' An error occurred
1233 */
1234static bool ldm_ldmdb_add (u8 *data, int len, struct ldmdb *ldb)
1235{
1236 struct vblk *vb;
1237 struct list_head *item;
1238
1239 BUG_ON (!data || !ldb);
1240
1241 vb = kmalloc (sizeof (*vb), GFP_KERNEL);
1242 if (!vb) {
1243 ldm_crit ("Out of memory.");
1244 return false;
1245 }
1246
1247 if (!ldm_parse_vblk (data, len, vb)) {
1248 kfree(vb);
1249 return false; /* Already logged */
1250 }
1251
1252 /* Put vblk into the correct list. */
1253 switch (vb->type) {
1254 case VBLK_DGR3:
1255 case VBLK_DGR4:
1256 list_add (&vb->list, &ldb->v_dgrp);
1257 break;
1258 case VBLK_DSK3:
1259 case VBLK_DSK4:
1260 list_add (&vb->list, &ldb->v_disk);
1261 break;
1262 case VBLK_VOL5:
1263 list_add (&vb->list, &ldb->v_volu);
1264 break;
1265 case VBLK_CMP3:
1266 list_add (&vb->list, &ldb->v_comp);
1267 break;
1268 case VBLK_PRT3:
1269 /* Sort by the partition's start sector. */
1270 list_for_each (item, &ldb->v_part) {
1271 struct vblk *v = list_entry (item, struct vblk, list);
1272 if ((v->vblk.part.disk_id == vb->vblk.part.disk_id) &&
1273 (v->vblk.part.start > vb->vblk.part.start)) {
1274 list_add_tail (&vb->list, &v->list);
1275 return true;
1276 }
1277 }
1278 list_add_tail (&vb->list, &ldb->v_part);
1279 break;
1280 }
1281 return true;
1282}
1283
1284/**
1285 * ldm_frag_add - Add a VBLK fragment to a list
1286 * @data: Raw fragment to be added to the list
1287 * @size: Size of the raw fragment
1288 * @frags: Linked list of VBLK fragments
1289 *
1290 * Fragmented VBLKs may not be consecutive in the database, so they are placed
1291 * in a list so they can be pieced together later.
1292 *
1293 * Return: 'true' Success, the VBLK was added to the list
1294 * 'false' Error, a problem occurred
1295 */
1296static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1297{
1298 struct frag *f;
1299 struct list_head *item;
1300 int rec, num, group;
1301
1302 BUG_ON (!data || !frags);
1303
1304 if (size < 2 * VBLK_SIZE_HEAD) {
1305 ldm_error("Value of size is to small.");
1306 return false;
1307 }
1308
1309 group = get_unaligned_be32(data + 0x08);
1310 rec = get_unaligned_be16(data + 0x0C);
1311 num = get_unaligned_be16(data + 0x0E);
1312 if ((num < 1) || (num > 4)) {
1313 ldm_error ("A VBLK claims to have %d parts.", num);
1314 return false;
1315 }
1316 if (rec >= num) {
1317 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num);
1318 return false;
1319 }
1320
1321 list_for_each (item, frags) {
1322 f = list_entry (item, struct frag, list);
1323 if (f->group == group)
1324 goto found;
1325 }
1326
1327 f = kmalloc (sizeof (*f) + size*num, GFP_KERNEL);
1328 if (!f) {
1329 ldm_crit ("Out of memory.");
1330 return false;
1331 }
1332
1333 f->group = group;
1334 f->num = num;
1335 f->rec = rec;
1336 f->map = 0xFF << num;
1337
1338 list_add_tail (&f->list, frags);
1339found:
1340 if (rec >= f->num) {
1341 ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num);
1342 return false;
1343 }
1344
1345 if (f->map & (1 << rec)) {
1346 ldm_error ("Duplicate VBLK, part %d.", rec);
1347 f->map &= 0x7F; /* Mark the group as broken */
1348 return false;
1349 }
1350
1351 f->map |= (1 << rec);
1352
1353 data += VBLK_SIZE_HEAD;
1354 size -= VBLK_SIZE_HEAD;
1355
1356 memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size);
1357
1358 return true;
1359}
1360
1361/**
1362 * ldm_frag_free - Free a linked list of VBLK fragments
1363 * @list: Linked list of fragments
1364 *
1365 * Free a linked list of VBLK fragments
1366 *
1367 * Return: none
1368 */
1369static void ldm_frag_free (struct list_head *list)
1370{
1371 struct list_head *item, *tmp;
1372
1373 BUG_ON (!list);
1374
1375 list_for_each_safe (item, tmp, list)
1376 kfree (list_entry (item, struct frag, list));
1377}
1378
1379/**
1380 * ldm_frag_commit - Validate fragmented VBLKs and add them to the database
1381 * @frags: Linked list of VBLK fragments
1382 * @ldb: Cache of the database structures
1383 *
1384 * Now that all the fragmented VBLKs have been collected, they must be added to
1385 * the database for later use.
1386 *
1387 * Return: 'true' All the fragments we added successfully
1388 * 'false' One or more of the fragments we invalid
1389 */
1390static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb)
1391{
1392 struct frag *f;
1393 struct list_head *item;
1394
1395 BUG_ON (!frags || !ldb);
1396
1397 list_for_each (item, frags) {
1398 f = list_entry (item, struct frag, list);
1399
1400 if (f->map != 0xFF) {
1401 ldm_error ("VBLK group %d is incomplete (0x%02x).",
1402 f->group, f->map);
1403 return false;
1404 }
1405
1406 if (!ldm_ldmdb_add (f->data, f->num*ldb->vm.vblk_size, ldb))
1407 return false; /* Already logged */
1408 }
1409 return true;
1410}
1411
1412/**
1413 * ldm_get_vblks - Read the on-disk database of VBLKs into memory
1414 * @state: Partition check state including device holding the LDM Database
1415 * @base: Offset, into @state->bdev, of the database
1416 * @ldb: Cache of the database structures
1417 *
1418 * To use the information from the VBLKs, they need to be read from the disk,
1419 * unpacked and validated. We cache them in @ldb according to their type.
1420 *
1421 * Return: 'true' All the VBLKs were read successfully
1422 * 'false' An error occurred
1423 */
1424static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base,
1425 struct ldmdb *ldb)
1426{
1427 int size, perbuf, skip, finish, s, v, recs;
1428 u8 *data = NULL;
1429 Sector sect;
1430 bool result = false;
1431 LIST_HEAD (frags);
1432
1433 BUG_ON(!state || !ldb);
1434
1435 size = ldb->vm.vblk_size;
1436 perbuf = 512 / size;
1437 skip = ldb->vm.vblk_offset >> 9; /* Bytes to sectors */
1438 finish = (size * ldb->vm.last_vblk_seq) >> 9;
1439
1440 for (s = skip; s < finish; s++) { /* For each sector */
1441 data = read_part_sector(state, base + OFF_VMDB + s, &sect);
1442 if (!data) {
1443 ldm_crit ("Disk read failed.");
1444 goto out;
1445 }
1446
1447 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
1448 if (MAGIC_VBLK != get_unaligned_be32(data)) {
1449 ldm_error ("Expected to find a VBLK.");
1450 goto out;
1451 }
1452
1453 recs = get_unaligned_be16(data + 0x0E); /* Number of records */
1454 if (recs == 1) {
1455 if (!ldm_ldmdb_add (data, size, ldb))
1456 goto out; /* Already logged */
1457 } else if (recs > 1) {
1458 if (!ldm_frag_add (data, size, &frags))
1459 goto out; /* Already logged */
1460 }
1461 /* else Record is not in use, ignore it. */
1462 }
1463 put_dev_sector (sect);
1464 data = NULL;
1465 }
1466
1467 result = ldm_frag_commit (&frags, ldb); /* Failures, already logged */
1468out:
1469 if (data)
1470 put_dev_sector (sect);
1471 ldm_frag_free (&frags);
1472
1473 return result;
1474}
1475
1476/**
1477 * ldm_free_vblks - Free a linked list of vblk's
1478 * @lh: Head of a linked list of struct vblk
1479 *
1480 * Free a list of vblk's and free the memory used to maintain the list.
1481 *
1482 * Return: none
1483 */
1484static void ldm_free_vblks (struct list_head *lh)
1485{
1486 struct list_head *item, *tmp;
1487
1488 BUG_ON (!lh);
1489
1490 list_for_each_safe (item, tmp, lh)
1491 kfree (list_entry (item, struct vblk, list));
1492}
1493
1494
1495/**
1496 * ldm_partition - Find out whether a device is a dynamic disk and handle it
1497 * @state: Partition check state including device holding the LDM Database
1498 *
1499 * This determines whether the device @bdev is a dynamic disk and if so creates
1500 * the partitions necessary in the gendisk structure pointed to by @hd.
1501 *
1502 * We create a dummy device 1, which contains the LDM database, and then create
1503 * each partition described by the LDM database in sequence as devices 2+. For
1504 * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3,
1505 * and so on: the actual data containing partitions.
1506 *
1507 * Return: 1 Success, @state->bdev is a dynamic disk and we handled it
1508 * 0 Success, @state->bdev is not a dynamic disk
1509 * -1 An error occurred before enough information had been read
1510 * Or @state->bdev is a dynamic disk, but it may be corrupted
1511 */
1512int ldm_partition(struct parsed_partitions *state)
1513{
1514 struct ldmdb *ldb;
1515 unsigned long base;
1516 int result = -1;
1517
1518 BUG_ON(!state);
1519
1520 /* Look for signs of a Dynamic Disk */
1521 if (!ldm_validate_partition_table(state))
1522 return 0;
1523
1524 ldb = kmalloc (sizeof (*ldb), GFP_KERNEL);
1525 if (!ldb) {
1526 ldm_crit ("Out of memory.");
1527 goto out;
1528 }
1529
1530 /* Parse and check privheads. */
1531 if (!ldm_validate_privheads(state, &ldb->ph))
1532 goto out; /* Already logged */
1533
1534 /* All further references are relative to base (database start). */
1535 base = ldb->ph.config_start;
1536
1537 /* Parse and check tocs and vmdb. */
1538 if (!ldm_validate_tocblocks(state, base, ldb) ||
1539 !ldm_validate_vmdb(state, base, ldb))
1540 goto out; /* Already logged */
1541
1542 /* Initialize vblk lists in ldmdb struct */
1543 INIT_LIST_HEAD (&ldb->v_dgrp);
1544 INIT_LIST_HEAD (&ldb->v_disk);
1545 INIT_LIST_HEAD (&ldb->v_volu);
1546 INIT_LIST_HEAD (&ldb->v_comp);
1547 INIT_LIST_HEAD (&ldb->v_part);
1548
1549 if (!ldm_get_vblks(state, base, ldb)) {
1550 ldm_crit ("Failed to read the VBLKs from the database.");
1551 goto cleanup;
1552 }
1553
1554 /* Finally, create the data partition devices. */
1555 if (ldm_create_data_partitions(state, ldb)) {
1556 ldm_debug ("Parsed LDM database successfully.");
1557 result = 1;
1558 }
1559 /* else Already logged */
1560
1561cleanup:
1562 ldm_free_vblks (&ldb->v_dgrp);
1563 ldm_free_vblks (&ldb->v_disk);
1564 ldm_free_vblks (&ldb->v_volu);
1565 ldm_free_vblks (&ldb->v_comp);
1566 ldm_free_vblks (&ldb->v_part);
1567out:
1568 kfree (ldb);
1569 return result;
1570}
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
deleted file mode 100644
index 374242c0971a..000000000000
--- a/fs/partitions/ldm.h
+++ /dev/null
@@ -1,215 +0,0 @@
1/**
2 * ldm - Part of the Linux-NTFS project.
3 *
4 * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org>
5 * Copyright (c) 2001-2007 Anton Altaparmakov
6 * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com>
7 *
8 * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 2 of the License, or (at your option)
13 * any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program (in the main directory of the Linux-NTFS source
22 * in the file COPYING); if not, write to the Free Software Foundation,
23 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25
26#ifndef _FS_PT_LDM_H_
27#define _FS_PT_LDM_H_
28
29#include <linux/types.h>
30#include <linux/list.h>
31#include <linux/genhd.h>
32#include <linux/fs.h>
33#include <asm/unaligned.h>
34#include <asm/byteorder.h>
35
36struct parsed_partitions;
37
38/* Magic numbers in CPU format. */
39#define MAGIC_VMDB 0x564D4442 /* VMDB */
40#define MAGIC_VBLK 0x56424C4B /* VBLK */
41#define MAGIC_PRIVHEAD 0x5052495648454144ULL /* PRIVHEAD */
42#define MAGIC_TOCBLOCK 0x544F43424C4F434BULL /* TOCBLOCK */
43
44/* The defined vblk types. */
45#define VBLK_VOL5 0x51 /* Volume, version 5 */
46#define VBLK_CMP3 0x32 /* Component, version 3 */
47#define VBLK_PRT3 0x33 /* Partition, version 3 */
48#define VBLK_DSK3 0x34 /* Disk, version 3 */
49#define VBLK_DSK4 0x44 /* Disk, version 4 */
50#define VBLK_DGR3 0x35 /* Disk Group, version 3 */
51#define VBLK_DGR4 0x45 /* Disk Group, version 4 */
52
53/* vblk flags indicating extra information will be present */
54#define VBLK_FLAG_COMP_STRIPE 0x10
55#define VBLK_FLAG_PART_INDEX 0x08
56#define VBLK_FLAG_DGR3_IDS 0x08
57#define VBLK_FLAG_DGR4_IDS 0x08
58#define VBLK_FLAG_VOLU_ID1 0x08
59#define VBLK_FLAG_VOLU_ID2 0x20
60#define VBLK_FLAG_VOLU_SIZE 0x80
61#define VBLK_FLAG_VOLU_DRIVE 0x02
62
63/* size of a vblk's static parts */
64#define VBLK_SIZE_HEAD 16
65#define VBLK_SIZE_CMP3 22 /* Name and version */
66#define VBLK_SIZE_DGR3 12
67#define VBLK_SIZE_DGR4 44
68#define VBLK_SIZE_DSK3 12
69#define VBLK_SIZE_DSK4 45
70#define VBLK_SIZE_PRT3 28
71#define VBLK_SIZE_VOL5 58
72
73/* component types */
74#define COMP_STRIPE 0x01 /* Stripe-set */
75#define COMP_BASIC 0x02 /* Basic disk */
76#define COMP_RAID 0x03 /* Raid-set */
77
78/* Other constants. */
79#define LDM_DB_SIZE 2048 /* Size in sectors (= 1MiB). */
80
81#define OFF_PRIV1 6 /* Offset of the first privhead
82 relative to the start of the
83 device in sectors */
84
85/* Offsets to structures within the LDM Database in sectors. */
86#define OFF_PRIV2 1856 /* Backup private headers. */
87#define OFF_PRIV3 2047
88
89#define OFF_TOCB1 1 /* Tables of contents. */
90#define OFF_TOCB2 2
91#define OFF_TOCB3 2045
92#define OFF_TOCB4 2046
93
94#define OFF_VMDB 17 /* List of partitions. */
95
96#define LDM_PARTITION 0x42 /* Formerly SFS (Landis). */
97
98#define TOC_BITMAP1 "config" /* Names of the two defined */
99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
100
101/* Borrowed from msdos.c */
102#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
103
104struct frag { /* VBLK Fragment handling */
105 struct list_head list;
106 u32 group;
107 u8 num; /* Total number of records */
108 u8 rec; /* This is record number n */
109 u8 map; /* Which portions are in use */
110 u8 data[0];
111};
112
113/* In memory LDM database structures. */
114
115#define GUID_SIZE 16
116
117struct privhead { /* Offsets and sizes are in sectors. */
118 u16 ver_major;
119 u16 ver_minor;
120 u64 logical_disk_start;
121 u64 logical_disk_size;
122 u64 config_start;
123 u64 config_size;
124 u8 disk_id[GUID_SIZE];
125};
126
127struct tocblock { /* We have exactly two bitmaps. */
128 u8 bitmap1_name[16];
129 u64 bitmap1_start;
130 u64 bitmap1_size;
131 u8 bitmap2_name[16];
132 u64 bitmap2_start;
133 u64 bitmap2_size;
134};
135
136struct vmdb { /* VMDB: The database header */
137 u16 ver_major;
138 u16 ver_minor;
139 u32 vblk_size;
140 u32 vblk_offset;
141 u32 last_vblk_seq;
142};
143
144struct vblk_comp { /* VBLK Component */
145 u8 state[16];
146 u64 parent_id;
147 u8 type;
148 u8 children;
149 u16 chunksize;
150};
151
152struct vblk_dgrp { /* VBLK Disk Group */
153 u8 disk_id[64];
154};
155
156struct vblk_disk { /* VBLK Disk */
157 u8 disk_id[GUID_SIZE];
158 u8 alt_name[128];
159};
160
161struct vblk_part { /* VBLK Partition */
162 u64 start;
163 u64 size; /* start, size and vol_off in sectors */
164 u64 volume_offset;
165 u64 parent_id;
166 u64 disk_id;
167 u8 partnum;
168};
169
170struct vblk_volu { /* VBLK Volume */
171 u8 volume_type[16];
172 u8 volume_state[16];
173 u8 guid[16];
174 u8 drive_hint[4];
175 u64 size;
176 u8 partition_type;
177};
178
179struct vblk_head { /* VBLK standard header */
180 u32 group;
181 u16 rec;
182 u16 nrec;
183};
184
185struct vblk { /* Generalised VBLK */
186 u8 name[64];
187 u64 obj_id;
188 u32 sequence;
189 u8 flags;
190 u8 type;
191 union {
192 struct vblk_comp comp;
193 struct vblk_dgrp dgrp;
194 struct vblk_disk disk;
195 struct vblk_part part;
196 struct vblk_volu volu;
197 } vblk;
198 struct list_head list;
199};
200
201struct ldmdb { /* Cache of the database */
202 struct privhead ph;
203 struct tocblock toc;
204 struct vmdb vm;
205 struct list_head v_dgrp;
206 struct list_head v_disk;
207 struct list_head v_volu;
208 struct list_head v_comp;
209 struct list_head v_part;
210};
211
212int ldm_partition(struct parsed_partitions *state);
213
214#endif /* _FS_PT_LDM_H_ */
215
diff --git a/fs/partitions/mac.c b/fs/partitions/mac.c
deleted file mode 100644
index 11f688bd76c5..000000000000
--- a/fs/partitions/mac.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * fs/partitions/mac.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 * Copyright (C) 1991-1998 Linus Torvalds
6 * Re-organised Feb 1998 Russell King
7 */
8
9#include <linux/ctype.h>
10#include "check.h"
11#include "mac.h"
12
13#ifdef CONFIG_PPC_PMAC
14#include <asm/machdep.h>
15extern void note_bootable_part(dev_t dev, int part, int goodness);
16#endif
17
18/*
19 * Code to understand MacOS partition tables.
20 */
21
22static inline void mac_fix_string(char *stg, int len)
23{
24 int i;
25
26 for (i = len - 1; i >= 0 && stg[i] == ' '; i--)
27 stg[i] = 0;
28}
29
30int mac_partition(struct parsed_partitions *state)
31{
32 Sector sect;
33 unsigned char *data;
34 int slot, blocks_in_map;
35 unsigned secsize;
36#ifdef CONFIG_PPC_PMAC
37 int found_root = 0;
38 int found_root_goodness = 0;
39#endif
40 struct mac_partition *part;
41 struct mac_driver_desc *md;
42
43 /* Get 0th block and look at the first partition map entry. */
44 md = read_part_sector(state, 0, &sect);
45 if (!md)
46 return -1;
47 if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) {
48 put_dev_sector(sect);
49 return 0;
50 }
51 secsize = be16_to_cpu(md->block_size);
52 put_dev_sector(sect);
53 data = read_part_sector(state, secsize/512, &sect);
54 if (!data)
55 return -1;
56 part = (struct mac_partition *) (data + secsize%512);
57 if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) {
58 put_dev_sector(sect);
59 return 0; /* not a MacOS disk */
60 }
61 blocks_in_map = be32_to_cpu(part->map_count);
62 if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) {
63 put_dev_sector(sect);
64 return 0;
65 }
66 strlcat(state->pp_buf, " [mac]", PAGE_SIZE);
67 for (slot = 1; slot <= blocks_in_map; ++slot) {
68 int pos = slot * secsize;
69 put_dev_sector(sect);
70 data = read_part_sector(state, pos/512, &sect);
71 if (!data)
72 return -1;
73 part = (struct mac_partition *) (data + pos%512);
74 if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC)
75 break;
76 put_partition(state, slot,
77 be32_to_cpu(part->start_block) * (secsize/512),
78 be32_to_cpu(part->block_count) * (secsize/512));
79
80 if (!strnicmp(part->type, "Linux_RAID", 10))
81 state->parts[slot].flags = ADDPART_FLAG_RAID;
82#ifdef CONFIG_PPC_PMAC
83 /*
84 * If this is the first bootable partition, tell the
85 * setup code, in case it wants to make this the root.
86 */
87 if (machine_is(powermac)) {
88 int goodness = 0;
89
90 mac_fix_string(part->processor, 16);
91 mac_fix_string(part->name, 32);
92 mac_fix_string(part->type, 32);
93
94 if ((be32_to_cpu(part->status) & MAC_STATUS_BOOTABLE)
95 && strcasecmp(part->processor, "powerpc") == 0)
96 goodness++;
97
98 if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0
99 || (strnicmp(part->type, "Linux", 5) == 0
100 && strcasecmp(part->type, "Linux_swap") != 0)) {
101 int i, l;
102
103 goodness++;
104 l = strlen(part->name);
105 if (strcmp(part->name, "/") == 0)
106 goodness++;
107 for (i = 0; i <= l - 4; ++i) {
108 if (strnicmp(part->name + i, "root",
109 4) == 0) {
110 goodness += 2;
111 break;
112 }
113 }
114 if (strnicmp(part->name, "swap", 4) == 0)
115 goodness--;
116 }
117
118 if (goodness > found_root_goodness) {
119 found_root = slot;
120 found_root_goodness = goodness;
121 }
122 }
123#endif /* CONFIG_PPC_PMAC */
124 }
125#ifdef CONFIG_PPC_PMAC
126 if (found_root_goodness)
127 note_bootable_part(state->bdev->bd_dev, found_root,
128 found_root_goodness);
129#endif
130
131 put_dev_sector(sect);
132 strlcat(state->pp_buf, "\n", PAGE_SIZE);
133 return 1;
134}
diff --git a/fs/partitions/mac.h b/fs/partitions/mac.h
deleted file mode 100644
index 3c7d98436380..000000000000
--- a/fs/partitions/mac.h
+++ /dev/null
@@ -1,44 +0,0 @@
1/*
2 * fs/partitions/mac.h
3 */
4
5#define MAC_PARTITION_MAGIC 0x504d
6
7/* type field value for A/UX or other Unix partitions */
8#define APPLE_AUX_TYPE "Apple_UNIX_SVR2"
9
10struct mac_partition {
11 __be16 signature; /* expected to be MAC_PARTITION_MAGIC */
12 __be16 res1;
13 __be32 map_count; /* # blocks in partition map */
14 __be32 start_block; /* absolute starting block # of partition */
15 __be32 block_count; /* number of blocks in partition */
16 char name[32]; /* partition name */
17 char type[32]; /* string type description */
18 __be32 data_start; /* rel block # of first data block */
19 __be32 data_count; /* number of data blocks */
20 __be32 status; /* partition status bits */
21 __be32 boot_start;
22 __be32 boot_size;
23 __be32 boot_load;
24 __be32 boot_load2;
25 __be32 boot_entry;
26 __be32 boot_entry2;
27 __be32 boot_cksum;
28 char processor[16]; /* identifies ISA of boot */
29 /* there is more stuff after this that we don't need */
30};
31
32#define MAC_STATUS_BOOTABLE 8 /* partition is bootable */
33
34#define MAC_DRIVER_MAGIC 0x4552
35
36/* Driver descriptor structure, in block 0 */
37struct mac_driver_desc {
38 __be16 signature; /* expected to be MAC_DRIVER_MAGIC */
39 __be16 block_size;
40 __be32 block_count;
41 /* ... more stuff */
42};
43
44int mac_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
deleted file mode 100644
index 5f79a6677c69..000000000000
--- a/fs/partitions/msdos.c
+++ /dev/null
@@ -1,552 +0,0 @@
1/*
2 * fs/partitions/msdos.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 * Copyright (C) 1991-1998 Linus Torvalds
6 *
7 * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
8 * in the early extended-partition checks and added DM partitions
9 *
10 * Support for DiskManager v6.0x added by Mark Lord,
11 * with information provided by OnTrack. This now works for linux fdisk
12 * and LILO, as well as loadlin and bootln. Note that disks other than
13 * /dev/hda *must* have a "DOS" type 0x51 partition in the first slot (hda1).
14 *
15 * More flexible handling of extended partitions - aeb, 950831
16 *
17 * Check partition table on IDE disks for common CHS translations
18 *
19 * Re-organised Feb 1998 Russell King
20 */
21#include <linux/msdos_fs.h>
22
23#include "check.h"
24#include "msdos.h"
25#include "efi.h"
26
27/*
28 * Many architectures don't like unaligned accesses, while
29 * the nr_sects and start_sect partition table entries are
30 * at a 2 (mod 4) address.
31 */
32#include <asm/unaligned.h>
33
34#define SYS_IND(p) get_unaligned(&p->sys_ind)
35
36static inline sector_t nr_sects(struct partition *p)
37{
38 return (sector_t)get_unaligned_le32(&p->nr_sects);
39}
40
41static inline sector_t start_sect(struct partition *p)
42{
43 return (sector_t)get_unaligned_le32(&p->start_sect);
44}
45
46static inline int is_extended_partition(struct partition *p)
47{
48 return (SYS_IND(p) == DOS_EXTENDED_PARTITION ||
49 SYS_IND(p) == WIN98_EXTENDED_PARTITION ||
50 SYS_IND(p) == LINUX_EXTENDED_PARTITION);
51}
52
53#define MSDOS_LABEL_MAGIC1 0x55
54#define MSDOS_LABEL_MAGIC2 0xAA
55
56static inline int
57msdos_magic_present(unsigned char *p)
58{
59 return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2);
60}
61
62/* Value is EBCDIC 'IBMA' */
63#define AIX_LABEL_MAGIC1 0xC9
64#define AIX_LABEL_MAGIC2 0xC2
65#define AIX_LABEL_MAGIC3 0xD4
66#define AIX_LABEL_MAGIC4 0xC1
67static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
68{
69 struct partition *pt = (struct partition *) (p + 0x1be);
70 Sector sect;
71 unsigned char *d;
72 int slot, ret = 0;
73
74 if (!(p[0] == AIX_LABEL_MAGIC1 &&
75 p[1] == AIX_LABEL_MAGIC2 &&
76 p[2] == AIX_LABEL_MAGIC3 &&
77 p[3] == AIX_LABEL_MAGIC4))
78 return 0;
79 /* Assume the partition table is valid if Linux partitions exists */
80 for (slot = 1; slot <= 4; slot++, pt++) {
81 if (pt->sys_ind == LINUX_SWAP_PARTITION ||
82 pt->sys_ind == LINUX_RAID_PARTITION ||
83 pt->sys_ind == LINUX_DATA_PARTITION ||
84 pt->sys_ind == LINUX_LVM_PARTITION ||
85 is_extended_partition(pt))
86 return 0;
87 }
88 d = read_part_sector(state, 7, &sect);
89 if (d) {
90 if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
91 ret = 1;
92 put_dev_sector(sect);
93 };
94 return ret;
95}
96
97/*
98 * Create devices for each logical partition in an extended partition.
99 * The logical partitions form a linked list, with each entry being
100 * a partition table with two entries. The first entry
101 * is the real data partition (with a start relative to the partition
102 * table start). The second is a pointer to the next logical partition
103 * (with a start relative to the entire extended partition).
104 * We do not create a Linux partition for the partition tables, but
105 * only for the actual data partitions.
106 */
107
108static void parse_extended(struct parsed_partitions *state,
109 sector_t first_sector, sector_t first_size)
110{
111 struct partition *p;
112 Sector sect;
113 unsigned char *data;
114 sector_t this_sector, this_size;
115 sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
116 int loopct = 0; /* number of links followed
117 without finding a data partition */
118 int i;
119
120 this_sector = first_sector;
121 this_size = first_size;
122
123 while (1) {
124 if (++loopct > 100)
125 return;
126 if (state->next == state->limit)
127 return;
128 data = read_part_sector(state, this_sector, &sect);
129 if (!data)
130 return;
131
132 if (!msdos_magic_present(data + 510))
133 goto done;
134
135 p = (struct partition *) (data + 0x1be);
136
137 /*
138 * Usually, the first entry is the real data partition,
139 * the 2nd entry is the next extended partition, or empty,
140 * and the 3rd and 4th entries are unused.
141 * However, DRDOS sometimes has the extended partition as
142 * the first entry (when the data partition is empty),
143 * and OS/2 seems to use all four entries.
144 */
145
146 /*
147 * First process the data partition(s)
148 */
149 for (i=0; i<4; i++, p++) {
150 sector_t offs, size, next;
151 if (!nr_sects(p) || is_extended_partition(p))
152 continue;
153
154 /* Check the 3rd and 4th entries -
155 these sometimes contain random garbage */
156 offs = start_sect(p)*sector_size;
157 size = nr_sects(p)*sector_size;
158 next = this_sector + offs;
159 if (i >= 2) {
160 if (offs + size > this_size)
161 continue;
162 if (next < first_sector)
163 continue;
164 if (next + size > first_sector + first_size)
165 continue;
166 }
167
168 put_partition(state, state->next, next, size);
169 if (SYS_IND(p) == LINUX_RAID_PARTITION)
170 state->parts[state->next].flags = ADDPART_FLAG_RAID;
171 loopct = 0;
172 if (++state->next == state->limit)
173 goto done;
174 }
175 /*
176 * Next, process the (first) extended partition, if present.
177 * (So far, there seems to be no reason to make
178 * parse_extended() recursive and allow a tree
179 * of extended partitions.)
180 * It should be a link to the next logical partition.
181 */
182 p -= 4;
183 for (i=0; i<4; i++, p++)
184 if (nr_sects(p) && is_extended_partition(p))
185 break;
186 if (i == 4)
187 goto done; /* nothing left to do */
188
189 this_sector = first_sector + start_sect(p) * sector_size;
190 this_size = nr_sects(p) * sector_size;
191 put_dev_sector(sect);
192 }
193done:
194 put_dev_sector(sect);
195}
196
197/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also
198 indicates linux swap. Be careful before believing this is Solaris. */
199
200static void parse_solaris_x86(struct parsed_partitions *state,
201 sector_t offset, sector_t size, int origin)
202{
203#ifdef CONFIG_SOLARIS_X86_PARTITION
204 Sector sect;
205 struct solaris_x86_vtoc *v;
206 int i;
207 short max_nparts;
208
209 v = read_part_sector(state, offset + 1, &sect);
210 if (!v)
211 return;
212 if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) {
213 put_dev_sector(sect);
214 return;
215 }
216 {
217 char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1];
218
219 snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin);
220 strlcat(state->pp_buf, tmp, PAGE_SIZE);
221 }
222 if (le32_to_cpu(v->v_version) != 1) {
223 char tmp[64];
224
225 snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n",
226 le32_to_cpu(v->v_version));
227 strlcat(state->pp_buf, tmp, PAGE_SIZE);
228 put_dev_sector(sect);
229 return;
230 }
231 /* Ensure we can handle previous case of VTOC with 8 entries gracefully */
232 max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8;
233 for (i=0; i<max_nparts && state->next<state->limit; i++) {
234 struct solaris_x86_slice *s = &v->v_slice[i];
235 char tmp[3 + 10 + 1 + 1];
236
237 if (s->s_size == 0)
238 continue;
239 snprintf(tmp, sizeof(tmp), " [s%d]", i);
240 strlcat(state->pp_buf, tmp, PAGE_SIZE);
241 /* solaris partitions are relative to current MS-DOS
242 * one; must add the offset of the current partition */
243 put_partition(state, state->next++,
244 le32_to_cpu(s->s_start)+offset,
245 le32_to_cpu(s->s_size));
246 }
247 put_dev_sector(sect);
248 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
249#endif
250}
251
252#if defined(CONFIG_BSD_DISKLABEL)
253/*
254 * Create devices for BSD partitions listed in a disklabel, under a
255 * dos-like partition. See parse_extended() for more information.
256 */
257static void parse_bsd(struct parsed_partitions *state,
258 sector_t offset, sector_t size, int origin, char *flavour,
259 int max_partitions)
260{
261 Sector sect;
262 struct bsd_disklabel *l;
263 struct bsd_partition *p;
264 char tmp[64];
265
266 l = read_part_sector(state, offset + 1, &sect);
267 if (!l)
268 return;
269 if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) {
270 put_dev_sector(sect);
271 return;
272 }
273
274 snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour);
275 strlcat(state->pp_buf, tmp, PAGE_SIZE);
276
277 if (le16_to_cpu(l->d_npartitions) < max_partitions)
278 max_partitions = le16_to_cpu(l->d_npartitions);
279 for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) {
280 sector_t bsd_start, bsd_size;
281
282 if (state->next == state->limit)
283 break;
284 if (p->p_fstype == BSD_FS_UNUSED)
285 continue;
286 bsd_start = le32_to_cpu(p->p_offset);
287 bsd_size = le32_to_cpu(p->p_size);
288 if (offset == bsd_start && size == bsd_size)
289 /* full parent partition, we have it already */
290 continue;
291 if (offset > bsd_start || offset+size < bsd_start+bsd_size) {
292 strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE);
293 continue;
294 }
295 put_partition(state, state->next++, bsd_start, bsd_size);
296 }
297 put_dev_sector(sect);
298 if (le16_to_cpu(l->d_npartitions) > max_partitions) {
299 snprintf(tmp, sizeof(tmp), " (ignored %d more)",
300 le16_to_cpu(l->d_npartitions) - max_partitions);
301 strlcat(state->pp_buf, tmp, PAGE_SIZE);
302 }
303 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
304}
305#endif
306
307static void parse_freebsd(struct parsed_partitions *state,
308 sector_t offset, sector_t size, int origin)
309{
310#ifdef CONFIG_BSD_DISKLABEL
311 parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS);
312#endif
313}
314
315static void parse_netbsd(struct parsed_partitions *state,
316 sector_t offset, sector_t size, int origin)
317{
318#ifdef CONFIG_BSD_DISKLABEL
319 parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS);
320#endif
321}
322
323static void parse_openbsd(struct parsed_partitions *state,
324 sector_t offset, sector_t size, int origin)
325{
326#ifdef CONFIG_BSD_DISKLABEL
327 parse_bsd(state, offset, size, origin, "openbsd",
328 OPENBSD_MAXPARTITIONS);
329#endif
330}
331
332/*
333 * Create devices for Unixware partitions listed in a disklabel, under a
334 * dos-like partition. See parse_extended() for more information.
335 */
336static void parse_unixware(struct parsed_partitions *state,
337 sector_t offset, sector_t size, int origin)
338{
339#ifdef CONFIG_UNIXWARE_DISKLABEL
340 Sector sect;
341 struct unixware_disklabel *l;
342 struct unixware_slice *p;
343
344 l = read_part_sector(state, offset + 29, &sect);
345 if (!l)
346 return;
347 if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC ||
348 le32_to_cpu(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) {
349 put_dev_sector(sect);
350 return;
351 }
352 {
353 char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1];
354
355 snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin);
356 strlcat(state->pp_buf, tmp, PAGE_SIZE);
357 }
358 p = &l->vtoc.v_slice[1];
359 /* I omit the 0th slice as it is the same as whole disk. */
360 while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) {
361 if (state->next == state->limit)
362 break;
363
364 if (p->s_label != UNIXWARE_FS_UNUSED)
365 put_partition(state, state->next++,
366 le32_to_cpu(p->start_sect),
367 le32_to_cpu(p->nr_sects));
368 p++;
369 }
370 put_dev_sector(sect);
371 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
372#endif
373}
374
375/*
376 * Minix 2.0.0/2.0.2 subpartition support.
377 * Anand Krishnamurthy <anandk@wiproge.med.ge.com>
378 * Rajeev V. Pillai <rajeevvp@yahoo.com>
379 */
380static void parse_minix(struct parsed_partitions *state,
381 sector_t offset, sector_t size, int origin)
382{
383#ifdef CONFIG_MINIX_SUBPARTITION
384 Sector sect;
385 unsigned char *data;
386 struct partition *p;
387 int i;
388
389 data = read_part_sector(state, offset, &sect);
390 if (!data)
391 return;
392
393 p = (struct partition *)(data + 0x1be);
394
395 /* The first sector of a Minix partition can have either
396 * a secondary MBR describing its subpartitions, or
397 * the normal boot sector. */
398 if (msdos_magic_present (data + 510) &&
399 SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */
400 char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1];
401
402 snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin);
403 strlcat(state->pp_buf, tmp, PAGE_SIZE);
404 for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) {
405 if (state->next == state->limit)
406 break;
407 /* add each partition in use */
408 if (SYS_IND(p) == MINIX_PARTITION)
409 put_partition(state, state->next++,
410 start_sect(p), nr_sects(p));
411 }
412 strlcat(state->pp_buf, " >\n", PAGE_SIZE);
413 }
414 put_dev_sector(sect);
415#endif /* CONFIG_MINIX_SUBPARTITION */
416}
417
418static struct {
419 unsigned char id;
420 void (*parse)(struct parsed_partitions *, sector_t, sector_t, int);
421} subtypes[] = {
422 {FREEBSD_PARTITION, parse_freebsd},
423 {NETBSD_PARTITION, parse_netbsd},
424 {OPENBSD_PARTITION, parse_openbsd},
425 {MINIX_PARTITION, parse_minix},
426 {UNIXWARE_PARTITION, parse_unixware},
427 {SOLARIS_X86_PARTITION, parse_solaris_x86},
428 {NEW_SOLARIS_X86_PARTITION, parse_solaris_x86},
429 {0, NULL},
430};
431
432int msdos_partition(struct parsed_partitions *state)
433{
434 sector_t sector_size = bdev_logical_block_size(state->bdev) / 512;
435 Sector sect;
436 unsigned char *data;
437 struct partition *p;
438 struct fat_boot_sector *fb;
439 int slot;
440
441 data = read_part_sector(state, 0, &sect);
442 if (!data)
443 return -1;
444 if (!msdos_magic_present(data + 510)) {
445 put_dev_sector(sect);
446 return 0;
447 }
448
449 if (aix_magic_present(state, data)) {
450 put_dev_sector(sect);
451 strlcat(state->pp_buf, " [AIX]", PAGE_SIZE);
452 return 0;
453 }
454
455 /*
456 * Now that the 55aa signature is present, this is probably
457 * either the boot sector of a FAT filesystem or a DOS-type
458 * partition table. Reject this in case the boot indicator
459 * is not 0 or 0x80.
460 */
461 p = (struct partition *) (data + 0x1be);
462 for (slot = 1; slot <= 4; slot++, p++) {
463 if (p->boot_ind != 0 && p->boot_ind != 0x80) {
464 /*
465 * Even without a valid boot inidicator value
466 * its still possible this is valid FAT filesystem
467 * without a partition table.
468 */
469 fb = (struct fat_boot_sector *) data;
470 if (slot == 1 && fb->reserved && fb->fats
471 && fat_valid_media(fb->media)) {
472 strlcat(state->pp_buf, "\n", PAGE_SIZE);
473 put_dev_sector(sect);
474 return 1;
475 } else {
476 put_dev_sector(sect);
477 return 0;
478 }
479 }
480 }
481
482#ifdef CONFIG_EFI_PARTITION
483 p = (struct partition *) (data + 0x1be);
484 for (slot = 1 ; slot <= 4 ; slot++, p++) {
485 /* If this is an EFI GPT disk, msdos should ignore it. */
486 if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) {
487 put_dev_sector(sect);
488 return 0;
489 }
490 }
491#endif
492 p = (struct partition *) (data + 0x1be);
493
494 /*
495 * Look for partitions in two passes:
496 * First find the primary and DOS-type extended partitions.
497 * On the second pass look inside *BSD, Unixware and Solaris partitions.
498 */
499
500 state->next = 5;
501 for (slot = 1 ; slot <= 4 ; slot++, p++) {
502 sector_t start = start_sect(p)*sector_size;
503 sector_t size = nr_sects(p)*sector_size;
504 if (!size)
505 continue;
506 if (is_extended_partition(p)) {
507 /*
508 * prevent someone doing mkfs or mkswap on an
509 * extended partition, but leave room for LILO
510 * FIXME: this uses one logical sector for > 512b
511 * sector, although it may not be enough/proper.
512 */
513 sector_t n = 2;
514 n = min(size, max(sector_size, n));
515 put_partition(state, slot, start, n);
516
517 strlcat(state->pp_buf, " <", PAGE_SIZE);
518 parse_extended(state, start, size);
519 strlcat(state->pp_buf, " >", PAGE_SIZE);
520 continue;
521 }
522 put_partition(state, slot, start, size);
523 if (SYS_IND(p) == LINUX_RAID_PARTITION)
524 state->parts[slot].flags = ADDPART_FLAG_RAID;
525 if (SYS_IND(p) == DM6_PARTITION)
526 strlcat(state->pp_buf, "[DM]", PAGE_SIZE);
527 if (SYS_IND(p) == EZD_PARTITION)
528 strlcat(state->pp_buf, "[EZD]", PAGE_SIZE);
529 }
530
531 strlcat(state->pp_buf, "\n", PAGE_SIZE);
532
533 /* second pass - output for each on a separate line */
534 p = (struct partition *) (0x1be + data);
535 for (slot = 1 ; slot <= 4 ; slot++, p++) {
536 unsigned char id = SYS_IND(p);
537 int n;
538
539 if (!nr_sects(p))
540 continue;
541
542 for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++)
543 ;
544
545 if (!subtypes[n].parse)
546 continue;
547 subtypes[n].parse(state, start_sect(p) * sector_size,
548 nr_sects(p) * sector_size, slot);
549 }
550 put_dev_sector(sect);
551 return 1;
552}
diff --git a/fs/partitions/msdos.h b/fs/partitions/msdos.h
deleted file mode 100644
index 38c781c490b3..000000000000
--- a/fs/partitions/msdos.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/*
2 * fs/partitions/msdos.h
3 */
4
5#define MSDOS_LABEL_MAGIC 0xAA55
6
7int msdos_partition(struct parsed_partitions *state);
8
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
deleted file mode 100644
index 764b86a01965..000000000000
--- a/fs/partitions/osf.c
+++ /dev/null
@@ -1,86 +0,0 @@
1/*
2 * fs/partitions/osf.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Copyright (C) 1991-1998 Linus Torvalds
7 * Re-organised Feb 1998 Russell King
8 */
9
10#include "check.h"
11#include "osf.h"
12
13#define MAX_OSF_PARTITIONS 18
14
15int osf_partition(struct parsed_partitions *state)
16{
17 int i;
18 int slot = 1;
19 unsigned int npartitions;
20 Sector sect;
21 unsigned char *data;
22 struct disklabel {
23 __le32 d_magic;
24 __le16 d_type,d_subtype;
25 u8 d_typename[16];
26 u8 d_packname[16];
27 __le32 d_secsize;
28 __le32 d_nsectors;
29 __le32 d_ntracks;
30 __le32 d_ncylinders;
31 __le32 d_secpercyl;
32 __le32 d_secprtunit;
33 __le16 d_sparespertrack;
34 __le16 d_sparespercyl;
35 __le32 d_acylinders;
36 __le16 d_rpm, d_interleave, d_trackskew, d_cylskew;
37 __le32 d_headswitch, d_trkseek, d_flags;
38 __le32 d_drivedata[5];
39 __le32 d_spare[5];
40 __le32 d_magic2;
41 __le16 d_checksum;
42 __le16 d_npartitions;
43 __le32 d_bbsize, d_sbsize;
44 struct d_partition {
45 __le32 p_size;
46 __le32 p_offset;
47 __le32 p_fsize;
48 u8 p_fstype;
49 u8 p_frag;
50 __le16 p_cpg;
51 } d_partitions[MAX_OSF_PARTITIONS];
52 } * label;
53 struct d_partition * partition;
54
55 data = read_part_sector(state, 0, &sect);
56 if (!data)
57 return -1;
58
59 label = (struct disklabel *) (data+64);
60 partition = label->d_partitions;
61 if (le32_to_cpu(label->d_magic) != DISKLABELMAGIC) {
62 put_dev_sector(sect);
63 return 0;
64 }
65 if (le32_to_cpu(label->d_magic2) != DISKLABELMAGIC) {
66 put_dev_sector(sect);
67 return 0;
68 }
69 npartitions = le16_to_cpu(label->d_npartitions);
70 if (npartitions > MAX_OSF_PARTITIONS) {
71 put_dev_sector(sect);
72 return 0;
73 }
74 for (i = 0 ; i < npartitions; i++, partition++) {
75 if (slot == state->limit)
76 break;
77 if (le32_to_cpu(partition->p_size))
78 put_partition(state, slot,
79 le32_to_cpu(partition->p_offset),
80 le32_to_cpu(partition->p_size));
81 slot++;
82 }
83 strlcat(state->pp_buf, "\n", PAGE_SIZE);
84 put_dev_sector(sect);
85 return 1;
86}
diff --git a/fs/partitions/osf.h b/fs/partitions/osf.h
deleted file mode 100644
index 20ed2315ec16..000000000000
--- a/fs/partitions/osf.h
+++ /dev/null
@@ -1,7 +0,0 @@
1/*
2 * fs/partitions/osf.h
3 */
4
5#define DISKLABELMAGIC (0x82564557UL)
6
7int osf_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c
deleted file mode 100644
index ea8a86dceaf4..000000000000
--- a/fs/partitions/sgi.c
+++ /dev/null
@@ -1,82 +0,0 @@
1/*
2 * fs/partitions/sgi.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 */
6
7#include "check.h"
8#include "sgi.h"
9
10struct sgi_disklabel {
11 __be32 magic_mushroom; /* Big fat spliff... */
12 __be16 root_part_num; /* Root partition number */
13 __be16 swap_part_num; /* Swap partition number */
14 s8 boot_file[16]; /* Name of boot file for ARCS */
15 u8 _unused0[48]; /* Device parameter useless crapola.. */
16 struct sgi_volume {
17 s8 name[8]; /* Name of volume */
18 __be32 block_num; /* Logical block number */
19 __be32 num_bytes; /* How big, in bytes */
20 } volume[15];
21 struct sgi_partition {
22 __be32 num_blocks; /* Size in logical blocks */
23 __be32 first_block; /* First logical block */
24 __be32 type; /* Type of this partition */
25 } partitions[16];
26 __be32 csum; /* Disk label checksum */
27 __be32 _unused1; /* Padding */
28};
29
30int sgi_partition(struct parsed_partitions *state)
31{
32 int i, csum;
33 __be32 magic;
34 int slot = 1;
35 unsigned int start, blocks;
36 __be32 *ui, cs;
37 Sector sect;
38 struct sgi_disklabel *label;
39 struct sgi_partition *p;
40 char b[BDEVNAME_SIZE];
41
42 label = read_part_sector(state, 0, &sect);
43 if (!label)
44 return -1;
45 p = &label->partitions[0];
46 magic = label->magic_mushroom;
47 if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) {
48 /*printk("Dev %s SGI disklabel: bad magic %08x\n",
49 bdevname(bdev, b), be32_to_cpu(magic));*/
50 put_dev_sector(sect);
51 return 0;
52 }
53 ui = ((__be32 *) (label + 1)) - 1;
54 for(csum = 0; ui >= ((__be32 *) label);) {
55 cs = *ui--;
56 csum += be32_to_cpu(cs);
57 }
58 if(csum) {
59 printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n",
60 bdevname(state->bdev, b));
61 put_dev_sector(sect);
62 return 0;
63 }
64 /* All SGI disk labels have 16 partitions, disks under Linux only
65 * have 15 minor's. Luckily there are always a few zero length
66 * partitions which we don't care about so we never overflow the
67 * current_minor.
68 */
69 for(i = 0; i < 16; i++, p++) {
70 blocks = be32_to_cpu(p->num_blocks);
71 start = be32_to_cpu(p->first_block);
72 if (blocks) {
73 put_partition(state, slot, start, blocks);
74 if (be32_to_cpu(p->type) == LINUX_RAID_PARTITION)
75 state->parts[slot].flags = ADDPART_FLAG_RAID;
76 }
77 slot++;
78 }
79 strlcat(state->pp_buf, "\n", PAGE_SIZE);
80 put_dev_sector(sect);
81 return 1;
82}
diff --git a/fs/partitions/sgi.h b/fs/partitions/sgi.h
deleted file mode 100644
index b9553ebdd5a9..000000000000
--- a/fs/partitions/sgi.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/*
2 * fs/partitions/sgi.h
3 */
4
5extern int sgi_partition(struct parsed_partitions *state);
6
7#define SGI_LABEL_MAGIC 0x0be5a941
8
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
deleted file mode 100644
index b5b6fcfb3d36..000000000000
--- a/fs/partitions/sun.c
+++ /dev/null
@@ -1,122 +0,0 @@
1/*
2 * fs/partitions/sun.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Copyright (C) 1991-1998 Linus Torvalds
7 * Re-organised Feb 1998 Russell King
8 */
9
10#include "check.h"
11#include "sun.h"
12
13int sun_partition(struct parsed_partitions *state)
14{
15 int i;
16 __be16 csum;
17 int slot = 1;
18 __be16 *ush;
19 Sector sect;
20 struct sun_disklabel {
21 unsigned char info[128]; /* Informative text string */
22 struct sun_vtoc {
23 __be32 version; /* Layout version */
24 char volume[8]; /* Volume name */
25 __be16 nparts; /* Number of partitions */
26 struct sun_info { /* Partition hdrs, sec 2 */
27 __be16 id;
28 __be16 flags;
29 } infos[8];
30 __be16 padding; /* Alignment padding */
31 __be32 bootinfo[3]; /* Info needed by mboot */
32 __be32 sanity; /* To verify vtoc sanity */
33 __be32 reserved[10]; /* Free space */
34 __be32 timestamp[8]; /* Partition timestamp */
35 } vtoc;
36 __be32 write_reinstruct; /* sectors to skip, writes */
37 __be32 read_reinstruct; /* sectors to skip, reads */
38 unsigned char spare[148]; /* Padding */
39 __be16 rspeed; /* Disk rotational speed */
40 __be16 pcylcount; /* Physical cylinder count */
41 __be16 sparecyl; /* extra sects per cylinder */
42 __be16 obs1; /* gap1 */
43 __be16 obs2; /* gap2 */
44 __be16 ilfact; /* Interleave factor */
45 __be16 ncyl; /* Data cylinder count */
46 __be16 nacyl; /* Alt. cylinder count */
47 __be16 ntrks; /* Tracks per cylinder */
48 __be16 nsect; /* Sectors per track */
49 __be16 obs3; /* bhead - Label head offset */
50 __be16 obs4; /* ppart - Physical Partition */
51 struct sun_partition {
52 __be32 start_cylinder;
53 __be32 num_sectors;
54 } partitions[8];
55 __be16 magic; /* Magic number */
56 __be16 csum; /* Label xor'd checksum */
57 } * label;
58 struct sun_partition *p;
59 unsigned long spc;
60 char b[BDEVNAME_SIZE];
61 int use_vtoc;
62 int nparts;
63
64 label = read_part_sector(state, 0, &sect);
65 if (!label)
66 return -1;
67
68 p = label->partitions;
69 if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) {
70/* printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n",
71 bdevname(bdev, b), be16_to_cpu(label->magic)); */
72 put_dev_sector(sect);
73 return 0;
74 }
75 /* Look at the checksum */
76 ush = ((__be16 *) (label+1)) - 1;
77 for (csum = 0; ush >= ((__be16 *) label);)
78 csum ^= *ush--;
79 if (csum) {
80 printk("Dev %s Sun disklabel: Csum bad, label corrupted\n",
81 bdevname(state->bdev, b));
82 put_dev_sector(sect);
83 return 0;
84 }
85
86 /* Check to see if we can use the VTOC table */
87 use_vtoc = ((be32_to_cpu(label->vtoc.sanity) == SUN_VTOC_SANITY) &&
88 (be32_to_cpu(label->vtoc.version) == 1) &&
89 (be16_to_cpu(label->vtoc.nparts) <= 8));
90
91 /* Use 8 partition entries if not specified in validated VTOC */
92 nparts = (use_vtoc) ? be16_to_cpu(label->vtoc.nparts) : 8;
93
94 /*
95 * So that old Linux-Sun partitions continue to work,
96 * alow the VTOC to be used under the additional condition ...
97 */
98 use_vtoc = use_vtoc || !(label->vtoc.sanity ||
99 label->vtoc.version || label->vtoc.nparts);
100 spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect);
101 for (i = 0; i < nparts; i++, p++) {
102 unsigned long st_sector;
103 unsigned int num_sectors;
104
105 st_sector = be32_to_cpu(p->start_cylinder) * spc;
106 num_sectors = be32_to_cpu(p->num_sectors);
107 if (num_sectors) {
108 put_partition(state, slot, st_sector, num_sectors);
109 state->parts[slot].flags = 0;
110 if (use_vtoc) {
111 if (be16_to_cpu(label->vtoc.infos[i].id) == LINUX_RAID_PARTITION)
112 state->parts[slot].flags |= ADDPART_FLAG_RAID;
113 else if (be16_to_cpu(label->vtoc.infos[i].id) == SUN_WHOLE_DISK)
114 state->parts[slot].flags |= ADDPART_FLAG_WHOLEDISK;
115 }
116 }
117 slot++;
118 }
119 strlcat(state->pp_buf, "\n", PAGE_SIZE);
120 put_dev_sector(sect);
121 return 1;
122}
diff --git a/fs/partitions/sun.h b/fs/partitions/sun.h
deleted file mode 100644
index 2424baa8319f..000000000000
--- a/fs/partitions/sun.h
+++ /dev/null
@@ -1,8 +0,0 @@
1/*
2 * fs/partitions/sun.h
3 */
4
5#define SUN_LABEL_MAGIC 0xDABE
6#define SUN_VTOC_SANITY 0x600DDEEE
7
8int sun_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/sysv68.c b/fs/partitions/sysv68.c
deleted file mode 100644
index 9627ccffc1c4..000000000000
--- a/fs/partitions/sysv68.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * fs/partitions/sysv68.c
3 *
4 * Copyright (C) 2007 Philippe De Muyter <phdm@macqel.be>
5 */
6
7#include "check.h"
8#include "sysv68.h"
9
10/*
11 * Volume ID structure: on first 256-bytes sector of disk
12 */
13
14struct volumeid {
15 u8 vid_unused[248];
16 u8 vid_mac[8]; /* ASCII string "MOTOROLA" */
17};
18
19/*
20 * config block: second 256-bytes sector on disk
21 */
22
23struct dkconfig {
24 u8 ios_unused0[128];
25 __be32 ios_slcblk; /* Slice table block number */
26 __be16 ios_slccnt; /* Number of entries in slice table */
27 u8 ios_unused1[122];
28};
29
30/*
31 * combined volumeid and dkconfig block
32 */
33
34struct dkblk0 {
35 struct volumeid dk_vid;
36 struct dkconfig dk_ios;
37};
38
39/*
40 * Slice Table Structure
41 */
42
43struct slice {
44 __be32 nblocks; /* slice size (in blocks) */
45 __be32 blkoff; /* block offset of slice */
46};
47
48
49int sysv68_partition(struct parsed_partitions *state)
50{
51 int i, slices;
52 int slot = 1;
53 Sector sect;
54 unsigned char *data;
55 struct dkblk0 *b;
56 struct slice *slice;
57 char tmp[64];
58
59 data = read_part_sector(state, 0, &sect);
60 if (!data)
61 return -1;
62
63 b = (struct dkblk0 *)data;
64 if (memcmp(b->dk_vid.vid_mac, "MOTOROLA", sizeof(b->dk_vid.vid_mac))) {
65 put_dev_sector(sect);
66 return 0;
67 }
68 slices = be16_to_cpu(b->dk_ios.ios_slccnt);
69 i = be32_to_cpu(b->dk_ios.ios_slcblk);
70 put_dev_sector(sect);
71
72 data = read_part_sector(state, i, &sect);
73 if (!data)
74 return -1;
75
76 slices -= 1; /* last slice is the whole disk */
77 snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices);
78 strlcat(state->pp_buf, tmp, PAGE_SIZE);
79 slice = (struct slice *)data;
80 for (i = 0; i < slices; i++, slice++) {
81 if (slot == state->limit)
82 break;
83 if (be32_to_cpu(slice->nblocks)) {
84 put_partition(state, slot,
85 be32_to_cpu(slice->blkoff),
86 be32_to_cpu(slice->nblocks));
87 snprintf(tmp, sizeof(tmp), "(s%u)", i);
88 strlcat(state->pp_buf, tmp, PAGE_SIZE);
89 }
90 slot++;
91 }
92 strlcat(state->pp_buf, "\n", PAGE_SIZE);
93 put_dev_sector(sect);
94 return 1;
95}
diff --git a/fs/partitions/sysv68.h b/fs/partitions/sysv68.h
deleted file mode 100644
index bf2f5ffa97ac..000000000000
--- a/fs/partitions/sysv68.h
+++ /dev/null
@@ -1 +0,0 @@
1extern int sysv68_partition(struct parsed_partitions *state);
diff --git a/fs/partitions/ultrix.c b/fs/partitions/ultrix.c
deleted file mode 100644
index 8dbaf9f77a99..000000000000
--- a/fs/partitions/ultrix.c
+++ /dev/null
@@ -1,48 +0,0 @@
1/*
2 * fs/partitions/ultrix.c
3 *
4 * Code extracted from drivers/block/genhd.c
5 *
6 * Re-organised Jul 1999 Russell King
7 */
8
9#include "check.h"
10#include "ultrix.h"
11
12int ultrix_partition(struct parsed_partitions *state)
13{
14 int i;
15 Sector sect;
16 unsigned char *data;
17 struct ultrix_disklabel {
18 s32 pt_magic; /* magic no. indicating part. info exits */
19 s32 pt_valid; /* set by driver if pt is current */
20 struct pt_info {
21 s32 pi_nblocks; /* no. of sectors */
22 u32 pi_blkoff; /* block offset for start */
23 } pt_part[8];
24 } *label;
25
26#define PT_MAGIC 0x032957 /* Partition magic number */
27#define PT_VALID 1 /* Indicates if struct is valid */
28
29 data = read_part_sector(state, (16384 - sizeof(*label))/512, &sect);
30 if (!data)
31 return -1;
32
33 label = (struct ultrix_disklabel *)(data + 512 - sizeof(*label));
34
35 if (label->pt_magic == PT_MAGIC && label->pt_valid == PT_VALID) {
36 for (i=0; i<8; i++)
37 if (label->pt_part[i].pi_nblocks)
38 put_partition(state, i+1,
39 label->pt_part[i].pi_blkoff,
40 label->pt_part[i].pi_nblocks);
41 put_dev_sector(sect);
42 strlcat(state->pp_buf, "\n", PAGE_SIZE);
43 return 1;
44 } else {
45 put_dev_sector(sect);
46 return 0;
47 }
48}
diff --git a/fs/partitions/ultrix.h b/fs/partitions/ultrix.h
deleted file mode 100644
index a3cc00b2bded..000000000000
--- a/fs/partitions/ultrix.h
+++ /dev/null
@@ -1,5 +0,0 @@
1/*
2 * fs/partitions/ultrix.h
3 */
4
5int ultrix_partition(struct parsed_partitions *state);
diff --git a/fs/pipe.c b/fs/pipe.c
index 4065f07366b3..f0e485d54e64 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1290,11 +1290,4 @@ static int __init init_pipe_fs(void)
1290 return err; 1290 return err;
1291} 1291}
1292 1292
1293static void __exit exit_pipe_fs(void)
1294{
1295 kern_unmount(pipe_mnt);
1296 unregister_filesystem(&pipe_fs_type);
1297}
1298
1299fs_initcall(init_pipe_fs); 1293fs_initcall(init_pipe_fs);
1300module_exit(exit_pipe_fs);
diff --git a/fs/pnode.c b/fs/pnode.c
index d42514e32380..ab5fa9e1a79a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -13,45 +13,30 @@
13#include "pnode.h" 13#include "pnode.h"
14 14
15/* return the next shared peer mount of @p */ 15/* return the next shared peer mount of @p */
16static inline struct vfsmount *next_peer(struct vfsmount *p) 16static inline struct mount *next_peer(struct mount *p)
17{ 17{
18 return list_entry(p->mnt_share.next, struct vfsmount, mnt_share); 18 return list_entry(p->mnt_share.next, struct mount, mnt_share);
19} 19}
20 20
21static inline struct vfsmount *first_slave(struct vfsmount *p) 21static inline struct mount *first_slave(struct mount *p)
22{ 22{
23 return list_entry(p->mnt_slave_list.next, struct vfsmount, mnt_slave); 23 return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
24} 24}
25 25
26static inline struct vfsmount *next_slave(struct vfsmount *p) 26static inline struct mount *next_slave(struct mount *p)
27{ 27{
28 return list_entry(p->mnt_slave.next, struct vfsmount, mnt_slave); 28 return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
29} 29}
30 30
31/* 31static struct mount *get_peer_under_root(struct mount *mnt,
32 * Return true if path is reachable from root 32 struct mnt_namespace *ns,
33 * 33 const struct path *root)
34 * namespace_sem is held, and mnt is attached
35 */
36static bool is_path_reachable(struct vfsmount *mnt, struct dentry *dentry,
37 const struct path *root)
38{
39 while (mnt != root->mnt && mnt->mnt_parent != mnt) {
40 dentry = mnt->mnt_mountpoint;
41 mnt = mnt->mnt_parent;
42 }
43 return mnt == root->mnt && is_subdir(dentry, root->dentry);
44}
45
46static struct vfsmount *get_peer_under_root(struct vfsmount *mnt,
47 struct mnt_namespace *ns,
48 const struct path *root)
49{ 34{
50 struct vfsmount *m = mnt; 35 struct mount *m = mnt;
51 36
52 do { 37 do {
53 /* Check the namespace first for optimization */ 38 /* Check the namespace first for optimization */
54 if (m->mnt_ns == ns && is_path_reachable(m, m->mnt_root, root)) 39 if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
55 return m; 40 return m;
56 41
57 m = next_peer(m); 42 m = next_peer(m);
@@ -66,12 +51,12 @@ static struct vfsmount *get_peer_under_root(struct vfsmount *mnt,
66 * 51 *
67 * Caller must hold namespace_sem 52 * Caller must hold namespace_sem
68 */ 53 */
69int get_dominating_id(struct vfsmount *mnt, const struct path *root) 54int get_dominating_id(struct mount *mnt, const struct path *root)
70{ 55{
71 struct vfsmount *m; 56 struct mount *m;
72 57
73 for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) { 58 for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
74 struct vfsmount *d = get_peer_under_root(m, mnt->mnt_ns, root); 59 struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
75 if (d) 60 if (d)
76 return d->mnt_group_id; 61 return d->mnt_group_id;
77 } 62 }
@@ -79,10 +64,10 @@ int get_dominating_id(struct vfsmount *mnt, const struct path *root)
79 return 0; 64 return 0;
80} 65}
81 66
82static int do_make_slave(struct vfsmount *mnt) 67static int do_make_slave(struct mount *mnt)
83{ 68{
84 struct vfsmount *peer_mnt = mnt, *master = mnt->mnt_master; 69 struct mount *peer_mnt = mnt, *master = mnt->mnt_master;
85 struct vfsmount *slave_mnt; 70 struct mount *slave_mnt;
86 71
87 /* 72 /*
88 * slave 'mnt' to a peer mount that has the 73 * slave 'mnt' to a peer mount that has the
@@ -90,7 +75,7 @@ static int do_make_slave(struct vfsmount *mnt)
90 * slave it to anything that is available. 75 * slave it to anything that is available.
91 */ 76 */
92 while ((peer_mnt = next_peer(peer_mnt)) != mnt && 77 while ((peer_mnt = next_peer(peer_mnt)) != mnt &&
93 peer_mnt->mnt_root != mnt->mnt_root) ; 78 peer_mnt->mnt.mnt_root != mnt->mnt.mnt_root) ;
94 79
95 if (peer_mnt == mnt) { 80 if (peer_mnt == mnt) {
96 peer_mnt = next_peer(mnt); 81 peer_mnt = next_peer(mnt);
@@ -116,7 +101,7 @@ static int do_make_slave(struct vfsmount *mnt)
116 struct list_head *p = &mnt->mnt_slave_list; 101 struct list_head *p = &mnt->mnt_slave_list;
117 while (!list_empty(p)) { 102 while (!list_empty(p)) {
118 slave_mnt = list_first_entry(p, 103 slave_mnt = list_first_entry(p,
119 struct vfsmount, mnt_slave); 104 struct mount, mnt_slave);
120 list_del_init(&slave_mnt->mnt_slave); 105 list_del_init(&slave_mnt->mnt_slave);
121 slave_mnt->mnt_master = NULL; 106 slave_mnt->mnt_master = NULL;
122 } 107 }
@@ -129,7 +114,7 @@ static int do_make_slave(struct vfsmount *mnt)
129/* 114/*
130 * vfsmount lock must be held for write 115 * vfsmount lock must be held for write
131 */ 116 */
132void change_mnt_propagation(struct vfsmount *mnt, int type) 117void change_mnt_propagation(struct mount *mnt, int type)
133{ 118{
134 if (type == MS_SHARED) { 119 if (type == MS_SHARED) {
135 set_mnt_shared(mnt); 120 set_mnt_shared(mnt);
@@ -140,9 +125,9 @@ void change_mnt_propagation(struct vfsmount *mnt, int type)
140 list_del_init(&mnt->mnt_slave); 125 list_del_init(&mnt->mnt_slave);
141 mnt->mnt_master = NULL; 126 mnt->mnt_master = NULL;
142 if (type == MS_UNBINDABLE) 127 if (type == MS_UNBINDABLE)
143 mnt->mnt_flags |= MNT_UNBINDABLE; 128 mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
144 else 129 else
145 mnt->mnt_flags &= ~MNT_UNBINDABLE; 130 mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
146 } 131 }
147} 132}
148 133
@@ -156,20 +141,19 @@ void change_mnt_propagation(struct vfsmount *mnt, int type)
156 * vfsmount found while iterating with propagation_next() is 141 * vfsmount found while iterating with propagation_next() is
157 * a peer of one we'd found earlier. 142 * a peer of one we'd found earlier.
158 */ 143 */
159static struct vfsmount *propagation_next(struct vfsmount *m, 144static struct mount *propagation_next(struct mount *m,
160 struct vfsmount *origin) 145 struct mount *origin)
161{ 146{
162 /* are there any slaves of this mount? */ 147 /* are there any slaves of this mount? */
163 if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list)) 148 if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
164 return first_slave(m); 149 return first_slave(m);
165 150
166 while (1) { 151 while (1) {
167 struct vfsmount *next; 152 struct mount *master = m->mnt_master;
168 struct vfsmount *master = m->mnt_master;
169 153
170 if (master == origin->mnt_master) { 154 if (master == origin->mnt_master) {
171 next = next_peer(m); 155 struct mount *next = next_peer(m);
172 return ((next == origin) ? NULL : next); 156 return (next == origin) ? NULL : next;
173 } else if (m->mnt_slave.next != &master->mnt_slave_list) 157 } else if (m->mnt_slave.next != &master->mnt_slave_list)
174 return next_slave(m); 158 return next_slave(m);
175 159
@@ -187,13 +171,13 @@ static struct vfsmount *propagation_next(struct vfsmount *m,
187 * @type return CL_SLAVE if the new mount has to be 171 * @type return CL_SLAVE if the new mount has to be
188 * cloned as a slave. 172 * cloned as a slave.
189 */ 173 */
190static struct vfsmount *get_source(struct vfsmount *dest, 174static struct mount *get_source(struct mount *dest,
191 struct vfsmount *last_dest, 175 struct mount *last_dest,
192 struct vfsmount *last_src, 176 struct mount *last_src,
193 int *type) 177 int *type)
194{ 178{
195 struct vfsmount *p_last_src = NULL; 179 struct mount *p_last_src = NULL;
196 struct vfsmount *p_last_dest = NULL; 180 struct mount *p_last_dest = NULL;
197 181
198 while (last_dest != dest->mnt_master) { 182 while (last_dest != dest->mnt_master) {
199 p_last_dest = last_dest; 183 p_last_dest = last_dest;
@@ -233,33 +217,33 @@ static struct vfsmount *get_source(struct vfsmount *dest,
233 * @source_mnt: source mount. 217 * @source_mnt: source mount.
234 * @tree_list : list of heads of trees to be attached. 218 * @tree_list : list of heads of trees to be attached.
235 */ 219 */
236int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, 220int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
237 struct vfsmount *source_mnt, struct list_head *tree_list) 221 struct mount *source_mnt, struct list_head *tree_list)
238{ 222{
239 struct vfsmount *m, *child; 223 struct mount *m, *child;
240 int ret = 0; 224 int ret = 0;
241 struct vfsmount *prev_dest_mnt = dest_mnt; 225 struct mount *prev_dest_mnt = dest_mnt;
242 struct vfsmount *prev_src_mnt = source_mnt; 226 struct mount *prev_src_mnt = source_mnt;
243 LIST_HEAD(tmp_list); 227 LIST_HEAD(tmp_list);
244 LIST_HEAD(umount_list); 228 LIST_HEAD(umount_list);
245 229
246 for (m = propagation_next(dest_mnt, dest_mnt); m; 230 for (m = propagation_next(dest_mnt, dest_mnt); m;
247 m = propagation_next(m, dest_mnt)) { 231 m = propagation_next(m, dest_mnt)) {
248 int type; 232 int type;
249 struct vfsmount *source; 233 struct mount *source;
250 234
251 if (IS_MNT_NEW(m)) 235 if (IS_MNT_NEW(m))
252 continue; 236 continue;
253 237
254 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); 238 source = get_source(m, prev_dest_mnt, prev_src_mnt, &type);
255 239
256 if (!(child = copy_tree(source, source->mnt_root, type))) { 240 if (!(child = copy_tree(source, source->mnt.mnt_root, type))) {
257 ret = -ENOMEM; 241 ret = -ENOMEM;
258 list_splice(tree_list, tmp_list.prev); 242 list_splice(tree_list, tmp_list.prev);
259 goto out; 243 goto out;
260 } 244 }
261 245
262 if (is_subdir(dest_dentry, m->mnt_root)) { 246 if (is_subdir(dest_dentry, m->mnt.mnt_root)) {
263 mnt_set_mountpoint(m, dest_dentry, child); 247 mnt_set_mountpoint(m, dest_dentry, child);
264 list_add_tail(&child->mnt_hash, tree_list); 248 list_add_tail(&child->mnt_hash, tree_list);
265 } else { 249 } else {
@@ -275,7 +259,7 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
275out: 259out:
276 br_write_lock(vfsmount_lock); 260 br_write_lock(vfsmount_lock);
277 while (!list_empty(&tmp_list)) { 261 while (!list_empty(&tmp_list)) {
278 child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); 262 child = list_first_entry(&tmp_list, struct mount, mnt_hash);
279 umount_tree(child, 0, &umount_list); 263 umount_tree(child, 0, &umount_list);
280 } 264 }
281 br_write_unlock(vfsmount_lock); 265 br_write_unlock(vfsmount_lock);
@@ -286,7 +270,7 @@ out:
286/* 270/*
287 * return true if the refcount is greater than count 271 * return true if the refcount is greater than count
288 */ 272 */
289static inline int do_refcount_check(struct vfsmount *mnt, int count) 273static inline int do_refcount_check(struct mount *mnt, int count)
290{ 274{
291 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts; 275 int mycount = mnt_get_count(mnt) - mnt->mnt_ghosts;
292 return (mycount > count); 276 return (mycount > count);
@@ -302,10 +286,10 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
302 * 286 *
303 * vfsmount lock must be held for write 287 * vfsmount lock must be held for write
304 */ 288 */
305int propagate_mount_busy(struct vfsmount *mnt, int refcnt) 289int propagate_mount_busy(struct mount *mnt, int refcnt)
306{ 290{
307 struct vfsmount *m, *child; 291 struct mount *m, *child;
308 struct vfsmount *parent = mnt->mnt_parent; 292 struct mount *parent = mnt->mnt_parent;
309 int ret = 0; 293 int ret = 0;
310 294
311 if (mnt == parent) 295 if (mnt == parent)
@@ -321,7 +305,7 @@ int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
321 305
322 for (m = propagation_next(parent, parent); m; 306 for (m = propagation_next(parent, parent); m;
323 m = propagation_next(m, parent)) { 307 m = propagation_next(m, parent)) {
324 child = __lookup_mnt(m, mnt->mnt_mountpoint, 0); 308 child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint, 0);
325 if (child && list_empty(&child->mnt_mounts) && 309 if (child && list_empty(&child->mnt_mounts) &&
326 (ret = do_refcount_check(child, 1))) 310 (ret = do_refcount_check(child, 1)))
327 break; 311 break;
@@ -333,17 +317,17 @@ int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
333 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its 317 * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
334 * parent propagates to. 318 * parent propagates to.
335 */ 319 */
336static void __propagate_umount(struct vfsmount *mnt) 320static void __propagate_umount(struct mount *mnt)
337{ 321{
338 struct vfsmount *parent = mnt->mnt_parent; 322 struct mount *parent = mnt->mnt_parent;
339 struct vfsmount *m; 323 struct mount *m;
340 324
341 BUG_ON(parent == mnt); 325 BUG_ON(parent == mnt);
342 326
343 for (m = propagation_next(parent, parent); m; 327 for (m = propagation_next(parent, parent); m;
344 m = propagation_next(m, parent)) { 328 m = propagation_next(m, parent)) {
345 329
346 struct vfsmount *child = __lookup_mnt(m, 330 struct mount *child = __lookup_mnt(&m->mnt,
347 mnt->mnt_mountpoint, 0); 331 mnt->mnt_mountpoint, 0);
348 /* 332 /*
349 * umount the child only if the child has no 333 * umount the child only if the child has no
@@ -363,7 +347,7 @@ static void __propagate_umount(struct vfsmount *mnt)
363 */ 347 */
364int propagate_umount(struct list_head *list) 348int propagate_umount(struct list_head *list)
365{ 349{
366 struct vfsmount *mnt; 350 struct mount *mnt;
367 351
368 list_for_each_entry(mnt, list, mnt_hash) 352 list_for_each_entry(mnt, list, mnt_hash)
369 __propagate_umount(mnt); 353 __propagate_umount(mnt);
diff --git a/fs/pnode.h b/fs/pnode.h
index 1ea4ae1efcd3..65c60979d541 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -9,13 +9,13 @@
9#define _LINUX_PNODE_H 9#define _LINUX_PNODE_H
10 10
11#include <linux/list.h> 11#include <linux/list.h>
12#include <linux/mount.h> 12#include "mount.h"
13 13
14#define IS_MNT_SHARED(mnt) (mnt->mnt_flags & MNT_SHARED) 14#define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
15#define IS_MNT_SLAVE(mnt) (mnt->mnt_master) 15#define IS_MNT_SLAVE(m) ((m)->mnt_master)
16#define IS_MNT_NEW(mnt) (!mnt->mnt_ns) 16#define IS_MNT_NEW(m) (!(m)->mnt_ns)
17#define CLEAR_MNT_SHARED(mnt) (mnt->mnt_flags &= ~MNT_SHARED) 17#define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
18#define IS_MNT_UNBINDABLE(mnt) (mnt->mnt_flags & MNT_UNBINDABLE) 18#define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
19 19
20#define CL_EXPIRE 0x01 20#define CL_EXPIRE 0x01
21#define CL_SLAVE 0x02 21#define CL_SLAVE 0x02
@@ -23,17 +23,25 @@
23#define CL_MAKE_SHARED 0x08 23#define CL_MAKE_SHARED 0x08
24#define CL_PRIVATE 0x10 24#define CL_PRIVATE 0x10
25 25
26static inline void set_mnt_shared(struct vfsmount *mnt) 26static inline void set_mnt_shared(struct mount *mnt)
27{ 27{
28 mnt->mnt_flags &= ~MNT_SHARED_MASK; 28 mnt->mnt.mnt_flags &= ~MNT_SHARED_MASK;
29 mnt->mnt_flags |= MNT_SHARED; 29 mnt->mnt.mnt_flags |= MNT_SHARED;
30} 30}
31 31
32void change_mnt_propagation(struct vfsmount *, int); 32void change_mnt_propagation(struct mount *, int);
33int propagate_mnt(struct vfsmount *, struct dentry *, struct vfsmount *, 33int propagate_mnt(struct mount *, struct dentry *, struct mount *,
34 struct list_head *); 34 struct list_head *);
35int propagate_umount(struct list_head *); 35int propagate_umount(struct list_head *);
36int propagate_mount_busy(struct vfsmount *, int); 36int propagate_mount_busy(struct mount *, int);
37void mnt_release_group_id(struct vfsmount *); 37void mnt_release_group_id(struct mount *);
38int get_dominating_id(struct vfsmount *mnt, const struct path *root); 38int get_dominating_id(struct mount *mnt, const struct path *root);
39unsigned int mnt_get_count(struct mount *mnt);
40void mnt_set_mountpoint(struct mount *, struct dentry *,
41 struct mount *);
42void release_mounts(struct list_head *);
43void umount_tree(struct mount *, int, struct list_head *);
44struct mount *copy_tree(struct mount *, struct dentry *, int);
45bool is_path_reachable(struct mount *, struct dentry *,
46 const struct path *root);
39#endif /* _LINUX_PNODE_H */ 47#endif /* _LINUX_PNODE_H */
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd228d1..8c344f037bd0 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -394,8 +394,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
394 394
395 sigemptyset(&sigign); 395 sigemptyset(&sigign);
396 sigemptyset(&sigcatch); 396 sigemptyset(&sigcatch);
397 cutime = cstime = utime = stime = cputime_zero; 397 cutime = cstime = utime = stime = 0;
398 cgtime = gtime = cputime_zero; 398 cgtime = gtime = 0;
399 399
400 if (lock_task_sighand(task, &flags)) { 400 if (lock_task_sighand(task, &flags)) {
401 struct signal_struct *sig = task->signal; 401 struct signal_struct *sig = task->signal;
@@ -423,14 +423,14 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
423 do { 423 do {
424 min_flt += t->min_flt; 424 min_flt += t->min_flt;
425 maj_flt += t->maj_flt; 425 maj_flt += t->maj_flt;
426 gtime = cputime_add(gtime, t->gtime); 426 gtime += t->gtime;
427 t = next_thread(t); 427 t = next_thread(t);
428 } while (t != task); 428 } while (t != task);
429 429
430 min_flt += sig->min_flt; 430 min_flt += sig->min_flt;
431 maj_flt += sig->maj_flt; 431 maj_flt += sig->maj_flt;
432 thread_group_times(task, &utime, &stime); 432 thread_group_times(task, &utime, &stime);
433 gtime = cputime_add(gtime, sig->gtime); 433 gtime += sig->gtime;
434 } 434 }
435 435
436 sid = task_session_nr_ns(task, ns); 436 sid = task_session_nr_ns(task, ns);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 851ba3dcdc29..a1dddda999f2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -101,7 +101,7 @@
101struct pid_entry { 101struct pid_entry {
102 char *name; 102 char *name;
103 int len; 103 int len;
104 mode_t mode; 104 umode_t mode;
105 const struct inode_operations *iop; 105 const struct inode_operations *iop;
106 const struct file_operations *fop; 106 const struct file_operations *fop;
107 union proc_op op; 107 union proc_op op;
@@ -631,120 +631,6 @@ static const struct inode_operations proc_def_inode_operations = {
631 .setattr = proc_setattr, 631 .setattr = proc_setattr,
632}; 632};
633 633
634static int mounts_open_common(struct inode *inode, struct file *file,
635 const struct seq_operations *op)
636{
637 struct task_struct *task = get_proc_task(inode);
638 struct nsproxy *nsp;
639 struct mnt_namespace *ns = NULL;
640 struct path root;
641 struct proc_mounts *p;
642 int ret = -EINVAL;
643
644 if (task) {
645 rcu_read_lock();
646 nsp = task_nsproxy(task);
647 if (nsp) {
648 ns = nsp->mnt_ns;
649 if (ns)
650 get_mnt_ns(ns);
651 }
652 rcu_read_unlock();
653 if (ns && get_task_root(task, &root) == 0)
654 ret = 0;
655 put_task_struct(task);
656 }
657
658 if (!ns)
659 goto err;
660 if (ret)
661 goto err_put_ns;
662
663 ret = -ENOMEM;
664 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
665 if (!p)
666 goto err_put_path;
667
668 file->private_data = &p->m;
669 ret = seq_open(file, op);
670 if (ret)
671 goto err_free;
672
673 p->m.private = p;
674 p->ns = ns;
675 p->root = root;
676 p->m.poll_event = ns->event;
677
678 return 0;
679
680 err_free:
681 kfree(p);
682 err_put_path:
683 path_put(&root);
684 err_put_ns:
685 put_mnt_ns(ns);
686 err:
687 return ret;
688}
689
690static int mounts_release(struct inode *inode, struct file *file)
691{
692 struct proc_mounts *p = file->private_data;
693 path_put(&p->root);
694 put_mnt_ns(p->ns);
695 return seq_release(inode, file);
696}
697
698static unsigned mounts_poll(struct file *file, poll_table *wait)
699{
700 struct proc_mounts *p = file->private_data;
701 unsigned res = POLLIN | POLLRDNORM;
702
703 poll_wait(file, &p->ns->poll, wait);
704 if (mnt_had_events(p))
705 res |= POLLERR | POLLPRI;
706
707 return res;
708}
709
710static int mounts_open(struct inode *inode, struct file *file)
711{
712 return mounts_open_common(inode, file, &mounts_op);
713}
714
715static const struct file_operations proc_mounts_operations = {
716 .open = mounts_open,
717 .read = seq_read,
718 .llseek = seq_lseek,
719 .release = mounts_release,
720 .poll = mounts_poll,
721};
722
723static int mountinfo_open(struct inode *inode, struct file *file)
724{
725 return mounts_open_common(inode, file, &mountinfo_op);
726}
727
728static const struct file_operations proc_mountinfo_operations = {
729 .open = mountinfo_open,
730 .read = seq_read,
731 .llseek = seq_lseek,
732 .release = mounts_release,
733 .poll = mounts_poll,
734};
735
736static int mountstats_open(struct inode *inode, struct file *file)
737{
738 return mounts_open_common(inode, file, &mountstats_op);
739}
740
741static const struct file_operations proc_mountstats_operations = {
742 .open = mountstats_open,
743 .read = seq_read,
744 .llseek = seq_lseek,
745 .release = mounts_release,
746};
747
748#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */ 634#define PROC_BLOCK_SIZE (3*1024) /* 4K page size but our output routines use some slack for overruns */
749 635
750static ssize_t proc_info_read(struct file * file, char __user * buf, 636static ssize_t proc_info_read(struct file * file, char __user * buf,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 10090d9c7ad5..2edf34f2eb61 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,7 +597,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
597 597
598static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, 598static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
599 const char *name, 599 const char *name,
600 mode_t mode, 600 umode_t mode,
601 nlink_t nlink) 601 nlink_t nlink)
602{ 602{
603 struct proc_dir_entry *ent = NULL; 603 struct proc_dir_entry *ent = NULL;
@@ -659,7 +659,7 @@ struct proc_dir_entry *proc_symlink(const char *name,
659} 659}
660EXPORT_SYMBOL(proc_symlink); 660EXPORT_SYMBOL(proc_symlink);
661 661
662struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode, 662struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
663 struct proc_dir_entry *parent) 663 struct proc_dir_entry *parent)
664{ 664{
665 struct proc_dir_entry *ent; 665 struct proc_dir_entry *ent;
@@ -699,7 +699,7 @@ struct proc_dir_entry *proc_mkdir(const char *name,
699} 699}
700EXPORT_SYMBOL(proc_mkdir); 700EXPORT_SYMBOL(proc_mkdir);
701 701
702struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, 702struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
703 struct proc_dir_entry *parent) 703 struct proc_dir_entry *parent)
704{ 704{
705 struct proc_dir_entry *ent; 705 struct proc_dir_entry *ent;
@@ -728,7 +728,7 @@ struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
728} 728}
729EXPORT_SYMBOL(create_proc_entry); 729EXPORT_SYMBOL(create_proc_entry);
730 730
731struct proc_dir_entry *proc_create_data(const char *name, mode_t mode, 731struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
732 struct proc_dir_entry *parent, 732 struct proc_dir_entry *parent,
733 const struct file_operations *proc_fops, 733 const struct file_operations *proc_fops,
734 void *data) 734 void *data)
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 7737c5468a40..51a176622b8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -77,7 +77,6 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
77static void proc_i_callback(struct rcu_head *head) 77static void proc_i_callback(struct rcu_head *head)
78{ 78{
79 struct inode *inode = container_of(head, struct inode, i_rcu); 79 struct inode *inode = container_of(head, struct inode, i_rcu);
80 INIT_LIST_HEAD(&inode->i_dentry);
81 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 80 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
82} 81}
83 82
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index be177f702acb..27da860115c6 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -9,7 +9,6 @@
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/utsname.h> 10#include <linux/utsname.h>
11#include <net/net_namespace.h> 11#include <net/net_namespace.h>
12#include <linux/mnt_namespace.h>
13#include <linux/ipc_namespace.h> 12#include <linux/ipc_namespace.h>
14#include <linux/pid_namespace.h> 13#include <linux/pid_namespace.h>
15#include "internal.h" 14#include "internal.h"
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index f738024ccc8e..06e1cc17caf6 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -179,7 +179,7 @@ const struct file_operations proc_net_operations = {
179 179
180 180
181struct proc_dir_entry *proc_net_fops_create(struct net *net, 181struct proc_dir_entry *proc_net_fops_create(struct net *net,
182 const char *name, mode_t mode, const struct file_operations *fops) 182 const char *name, umode_t mode, const struct file_operations *fops)
183{ 183{
184 return proc_create(name, mode, net->proc_net, fops); 184 return proc_create(name, mode, net->proc_net, fops);
185} 185}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9a8a2b77b874..03102d978180 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -91,20 +91,18 @@ static struct file_system_type proc_fs_type = {
91 91
92void __init proc_root_init(void) 92void __init proc_root_init(void)
93{ 93{
94 struct vfsmount *mnt;
95 int err; 94 int err;
96 95
97 proc_init_inodecache(); 96 proc_init_inodecache();
98 err = register_filesystem(&proc_fs_type); 97 err = register_filesystem(&proc_fs_type);
99 if (err) 98 if (err)
100 return; 99 return;
101 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); 100 err = pid_ns_prepare_proc(&init_pid_ns);
102 if (IS_ERR(mnt)) { 101 if (err) {
103 unregister_filesystem(&proc_fs_type); 102 unregister_filesystem(&proc_fs_type);
104 return; 103 return;
105 } 104 }
106 105
107 init_pid_ns.proc_mnt = mnt;
108 proc_symlink("mounts", NULL, "self/mounts"); 106 proc_symlink("mounts", NULL, "self/mounts");
109 107
110 proc_net_init(); 108 proc_net_init();
@@ -209,5 +207,5 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
209 207
210void pid_ns_release_proc(struct pid_namespace *ns) 208void pid_ns_release_proc(struct pid_namespace *ns)
211{ 209{
212 mntput(ns->proc_mnt); 210 kern_unmount(ns->proc_mnt);
213} 211}
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 2a30d67dd6b8..d76ca6ae2b1b 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -22,31 +22,29 @@
22#define arch_idle_time(cpu) 0 22#define arch_idle_time(cpu) 0
23#endif 23#endif
24 24
25static cputime64_t get_idle_time(int cpu) 25static u64 get_idle_time(int cpu)
26{ 26{
27 u64 idle_time = get_cpu_idle_time_us(cpu, NULL); 27 u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL);
28 cputime64_t idle;
29 28
30 if (idle_time == -1ULL) { 29 if (idle_time == -1ULL) {
31 /* !NO_HZ so we can rely on cpustat.idle */ 30 /* !NO_HZ so we can rely on cpustat.idle */
32 idle = kstat_cpu(cpu).cpustat.idle; 31 idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
33 idle = cputime64_add(idle, arch_idle_time(cpu)); 32 idle += arch_idle_time(cpu);
34 } else 33 } else
35 idle = nsecs_to_jiffies64(1000 * idle_time); 34 idle = usecs_to_cputime64(idle_time);
36 35
37 return idle; 36 return idle;
38} 37}
39 38
40static cputime64_t get_iowait_time(int cpu) 39static u64 get_iowait_time(int cpu)
41{ 40{
42 u64 iowait_time = get_cpu_iowait_time_us(cpu, NULL); 41 u64 iowait, iowait_time = get_cpu_iowait_time_us(cpu, NULL);
43 cputime64_t iowait;
44 42
45 if (iowait_time == -1ULL) 43 if (iowait_time == -1ULL)
46 /* !NO_HZ so we can rely on cpustat.iowait */ 44 /* !NO_HZ so we can rely on cpustat.iowait */
47 iowait = kstat_cpu(cpu).cpustat.iowait; 45 iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
48 else 46 else
49 iowait = nsecs_to_jiffies64(1000 * iowait_time); 47 iowait = usecs_to_cputime64(iowait_time);
50 48
51 return iowait; 49 return iowait;
52} 50}
@@ -55,33 +53,30 @@ static int show_stat(struct seq_file *p, void *v)
55{ 53{
56 int i, j; 54 int i, j;
57 unsigned long jif; 55 unsigned long jif;
58 cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; 56 u64 user, nice, system, idle, iowait, irq, softirq, steal;
59 cputime64_t guest, guest_nice; 57 u64 guest, guest_nice;
60 u64 sum = 0; 58 u64 sum = 0;
61 u64 sum_softirq = 0; 59 u64 sum_softirq = 0;
62 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; 60 unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
63 struct timespec boottime; 61 struct timespec boottime;
64 62
65 user = nice = system = idle = iowait = 63 user = nice = system = idle = iowait =
66 irq = softirq = steal = cputime64_zero; 64 irq = softirq = steal = 0;
67 guest = guest_nice = cputime64_zero; 65 guest = guest_nice = 0;
68 getboottime(&boottime); 66 getboottime(&boottime);
69 jif = boottime.tv_sec; 67 jif = boottime.tv_sec;
70 68
71 for_each_possible_cpu(i) { 69 for_each_possible_cpu(i) {
72 user = cputime64_add(user, kstat_cpu(i).cpustat.user); 70 user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
73 nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); 71 nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
74 system = cputime64_add(system, kstat_cpu(i).cpustat.system); 72 system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
75 idle = cputime64_add(idle, get_idle_time(i)); 73 idle += get_idle_time(i);
76 iowait = cputime64_add(iowait, get_iowait_time(i)); 74 iowait += get_iowait_time(i);
77 irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); 75 irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
78 softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); 76 softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
79 steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); 77 steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
80 guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); 78 guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
81 guest_nice = cputime64_add(guest_nice, 79 guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
82 kstat_cpu(i).cpustat.guest_nice);
83 sum += kstat_cpu_irqs_sum(i);
84 sum += arch_irq_stat_cpu(i);
85 80
86 for (j = 0; j < NR_SOFTIRQS; j++) { 81 for (j = 0; j < NR_SOFTIRQS; j++) {
87 unsigned int softirq_stat = kstat_softirqs_cpu(j, i); 82 unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
@@ -106,16 +101,16 @@ static int show_stat(struct seq_file *p, void *v)
106 (unsigned long long)cputime64_to_clock_t(guest_nice)); 101 (unsigned long long)cputime64_to_clock_t(guest_nice));
107 for_each_online_cpu(i) { 102 for_each_online_cpu(i) {
108 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ 103 /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
109 user = kstat_cpu(i).cpustat.user; 104 user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
110 nice = kstat_cpu(i).cpustat.nice; 105 nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
111 system = kstat_cpu(i).cpustat.system; 106 system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
112 idle = get_idle_time(i); 107 idle = get_idle_time(i);
113 iowait = get_iowait_time(i); 108 iowait = get_iowait_time(i);
114 irq = kstat_cpu(i).cpustat.irq; 109 irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
115 softirq = kstat_cpu(i).cpustat.softirq; 110 softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
116 steal = kstat_cpu(i).cpustat.steal; 111 steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
117 guest = kstat_cpu(i).cpustat.guest; 112 guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
118 guest_nice = kstat_cpu(i).cpustat.guest_nice; 113 guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
119 seq_printf(p, 114 seq_printf(p,
120 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " 115 "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu "
121 "%llu\n", 116 "%llu\n",
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 766b1d456050..9610ac772d7e 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -11,15 +11,20 @@ static int uptime_proc_show(struct seq_file *m, void *v)
11{ 11{
12 struct timespec uptime; 12 struct timespec uptime;
13 struct timespec idle; 13 struct timespec idle;
14 u64 idletime;
15 u64 nsec;
16 u32 rem;
14 int i; 17 int i;
15 cputime_t idletime = cputime_zero;
16 18
19 idletime = 0;
17 for_each_possible_cpu(i) 20 for_each_possible_cpu(i)
18 idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle); 21 idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
19 22
20 do_posix_clock_monotonic_gettime(&uptime); 23 do_posix_clock_monotonic_gettime(&uptime);
21 monotonic_to_bootbased(&uptime); 24 monotonic_to_bootbased(&uptime);
22 cputime_to_timespec(idletime, &idle); 25 nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
26 idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
27 idle.tv_nsec = rem;
23 seq_printf(m, "%lu.%02lu %lu.%02lu\n", 28 seq_printf(m, "%lu.%02lu %lu.%02lu\n",
24 (unsigned long) uptime.tv_sec, 29 (unsigned long) uptime.tv_sec,
25 (uptime.tv_nsec / (NSEC_PER_SEC / 100)), 30 (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
new file mode 100644
index 000000000000..12412852d88a
--- /dev/null
+++ b/fs/proc_namespace.c
@@ -0,0 +1,333 @@
1/*
2 * fs/proc_namespace.c - handling of /proc/<pid>/{mounts,mountinfo,mountstats}
3 *
4 * In fact, that's a piece of procfs; it's *almost* isolated from
5 * the rest of fs/proc, but has rather close relationships with
6 * fs/namespace.c, thus here instead of fs/proc
7 *
8 */
9#include <linux/mnt_namespace.h>
10#include <linux/nsproxy.h>
11#include <linux/security.h>
12#include <linux/fs_struct.h>
13#include "proc/internal.h" /* only for get_proc_task() in ->open() */
14
15#include "pnode.h"
16#include "internal.h"
17
18static unsigned mounts_poll(struct file *file, poll_table *wait)
19{
20 struct proc_mounts *p = file->private_data;
21 struct mnt_namespace *ns = p->ns;
22 unsigned res = POLLIN | POLLRDNORM;
23
24 poll_wait(file, &p->ns->poll, wait);
25
26 br_read_lock(vfsmount_lock);
27 if (p->m.poll_event != ns->event) {
28 p->m.poll_event = ns->event;
29 res |= POLLERR | POLLPRI;
30 }
31 br_read_unlock(vfsmount_lock);
32
33 return res;
34}
35
36struct proc_fs_info {
37 int flag;
38 const char *str;
39};
40
41static int show_sb_opts(struct seq_file *m, struct super_block *sb)
42{
43 static const struct proc_fs_info fs_info[] = {
44 { MS_SYNCHRONOUS, ",sync" },
45 { MS_DIRSYNC, ",dirsync" },
46 { MS_MANDLOCK, ",mand" },
47 { 0, NULL }
48 };
49 const struct proc_fs_info *fs_infop;
50
51 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
52 if (sb->s_flags & fs_infop->flag)
53 seq_puts(m, fs_infop->str);
54 }
55
56 return security_sb_show_options(m, sb);
57}
58
59static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
60{
61 static const struct proc_fs_info mnt_info[] = {
62 { MNT_NOSUID, ",nosuid" },
63 { MNT_NODEV, ",nodev" },
64 { MNT_NOEXEC, ",noexec" },
65 { MNT_NOATIME, ",noatime" },
66 { MNT_NODIRATIME, ",nodiratime" },
67 { MNT_RELATIME, ",relatime" },
68 { 0, NULL }
69 };
70 const struct proc_fs_info *fs_infop;
71
72 for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) {
73 if (mnt->mnt_flags & fs_infop->flag)
74 seq_puts(m, fs_infop->str);
75 }
76}
77
78static inline void mangle(struct seq_file *m, const char *s)
79{
80 seq_escape(m, s, " \t\n\\");
81}
82
83static void show_type(struct seq_file *m, struct super_block *sb)
84{
85 mangle(m, sb->s_type->name);
86 if (sb->s_subtype && sb->s_subtype[0]) {
87 seq_putc(m, '.');
88 mangle(m, sb->s_subtype);
89 }
90}
91
92static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
93{
94 struct mount *r = real_mount(mnt);
95 int err = 0;
96 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
97 struct super_block *sb = mnt_path.dentry->d_sb;
98
99 if (sb->s_op->show_devname) {
100 err = sb->s_op->show_devname(m, mnt_path.dentry);
101 if (err)
102 goto out;
103 } else {
104 mangle(m, r->mnt_devname ? r->mnt_devname : "none");
105 }
106 seq_putc(m, ' ');
107 seq_path(m, &mnt_path, " \t\n\\");
108 seq_putc(m, ' ');
109 show_type(m, sb);
110 seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
111 err = show_sb_opts(m, sb);
112 if (err)
113 goto out;
114 show_mnt_opts(m, mnt);
115 if (sb->s_op->show_options)
116 err = sb->s_op->show_options(m, mnt_path.dentry);
117 seq_puts(m, " 0 0\n");
118out:
119 return err;
120}
121
122static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
123{
124 struct proc_mounts *p = m->private;
125 struct mount *r = real_mount(mnt);
126 struct super_block *sb = mnt->mnt_sb;
127 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
128 struct path root = p->root;
129 int err = 0;
130
131 seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
132 MAJOR(sb->s_dev), MINOR(sb->s_dev));
133 if (sb->s_op->show_path)
134 err = sb->s_op->show_path(m, mnt->mnt_root);
135 else
136 seq_dentry(m, mnt->mnt_root, " \t\n\\");
137 if (err)
138 goto out;
139 seq_putc(m, ' ');
140
141 /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
142 err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
143 if (err)
144 goto out;
145
146 seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
147 show_mnt_opts(m, mnt);
148
149 /* Tagged fields ("foo:X" or "bar") */
150 if (IS_MNT_SHARED(r))
151 seq_printf(m, " shared:%i", r->mnt_group_id);
152 if (IS_MNT_SLAVE(r)) {
153 int master = r->mnt_master->mnt_group_id;
154 int dom = get_dominating_id(r, &p->root);
155 seq_printf(m, " master:%i", master);
156 if (dom && dom != master)
157 seq_printf(m, " propagate_from:%i", dom);
158 }
159 if (IS_MNT_UNBINDABLE(r))
160 seq_puts(m, " unbindable");
161
162 /* Filesystem specific data */
163 seq_puts(m, " - ");
164 show_type(m, sb);
165 seq_putc(m, ' ');
166 if (sb->s_op->show_devname)
167 err = sb->s_op->show_devname(m, mnt->mnt_root);
168 else
169 mangle(m, r->mnt_devname ? r->mnt_devname : "none");
170 if (err)
171 goto out;
172 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
173 err = show_sb_opts(m, sb);
174 if (err)
175 goto out;
176 if (sb->s_op->show_options)
177 err = sb->s_op->show_options(m, mnt->mnt_root);
178 seq_putc(m, '\n');
179out:
180 return err;
181}
182
183static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
184{
185 struct mount *r = real_mount(mnt);
186 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
187 struct super_block *sb = mnt_path.dentry->d_sb;
188 int err = 0;
189
190 /* device */
191 if (sb->s_op->show_devname) {
192 seq_puts(m, "device ");
193 err = sb->s_op->show_devname(m, mnt_path.dentry);
194 } else {
195 if (r->mnt_devname) {
196 seq_puts(m, "device ");
197 mangle(m, r->mnt_devname);
198 } else
199 seq_puts(m, "no device");
200 }
201
202 /* mount point */
203 seq_puts(m, " mounted on ");
204 seq_path(m, &mnt_path, " \t\n\\");
205 seq_putc(m, ' ');
206
207 /* file system type */
208 seq_puts(m, "with fstype ");
209 show_type(m, sb);
210
211 /* optional statistics */
212 if (sb->s_op->show_stats) {
213 seq_putc(m, ' ');
214 if (!err)
215 err = sb->s_op->show_stats(m, mnt_path.dentry);
216 }
217
218 seq_putc(m, '\n');
219 return err;
220}
221
222static int mounts_open_common(struct inode *inode, struct file *file,
223 int (*show)(struct seq_file *, struct vfsmount *))
224{
225 struct task_struct *task = get_proc_task(inode);
226 struct nsproxy *nsp;
227 struct mnt_namespace *ns = NULL;
228 struct path root;
229 struct proc_mounts *p;
230 int ret = -EINVAL;
231
232 if (!task)
233 goto err;
234
235 rcu_read_lock();
236 nsp = task_nsproxy(task);
237 if (!nsp) {
238 rcu_read_unlock();
239 put_task_struct(task);
240 goto err;
241 }
242 ns = nsp->mnt_ns;
243 if (!ns) {
244 rcu_read_unlock();
245 put_task_struct(task);
246 goto err;
247 }
248 get_mnt_ns(ns);
249 rcu_read_unlock();
250 task_lock(task);
251 if (!task->fs) {
252 task_unlock(task);
253 put_task_struct(task);
254 ret = -ENOENT;
255 goto err_put_ns;
256 }
257 get_fs_root(task->fs, &root);
258 task_unlock(task);
259 put_task_struct(task);
260
261 ret = -ENOMEM;
262 p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
263 if (!p)
264 goto err_put_path;
265
266 file->private_data = &p->m;
267 ret = seq_open(file, &mounts_op);
268 if (ret)
269 goto err_free;
270
271 p->m.private = p;
272 p->ns = ns;
273 p->root = root;
274 p->m.poll_event = ns->event;
275 p->show = show;
276
277 return 0;
278
279 err_free:
280 kfree(p);
281 err_put_path:
282 path_put(&root);
283 err_put_ns:
284 put_mnt_ns(ns);
285 err:
286 return ret;
287}
288
289static int mounts_release(struct inode *inode, struct file *file)
290{
291 struct proc_mounts *p = file->private_data;
292 path_put(&p->root);
293 put_mnt_ns(p->ns);
294 return seq_release(inode, file);
295}
296
297static int mounts_open(struct inode *inode, struct file *file)
298{
299 return mounts_open_common(inode, file, show_vfsmnt);
300}
301
302static int mountinfo_open(struct inode *inode, struct file *file)
303{
304 return mounts_open_common(inode, file, show_mountinfo);
305}
306
307static int mountstats_open(struct inode *inode, struct file *file)
308{
309 return mounts_open_common(inode, file, show_vfsstat);
310}
311
312const struct file_operations proc_mounts_operations = {
313 .open = mounts_open,
314 .read = seq_read,
315 .llseek = seq_lseek,
316 .release = mounts_release,
317 .poll = mounts_poll,
318};
319
320const struct file_operations proc_mountinfo_operations = {
321 .open = mountinfo_open,
322 .read = seq_read,
323 .llseek = seq_lseek,
324 .release = mounts_release,
325 .poll = mounts_poll,
326};
327
328const struct file_operations proc_mountstats_operations = {
329 .open = mountstats_open,
330 .read = seq_read,
331 .llseek = seq_lseek,
332 .release = mounts_release,
333};
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 379a02dc1217..b3b426edb2fd 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -80,7 +80,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
80{ 80{
81 struct pstore_private *p = dentry->d_inode->i_private; 81 struct pstore_private *p = dentry->d_inode->i_private;
82 82
83 p->psi->erase(p->type, p->id, p->psi); 83 if (p->psi->erase)
84 p->psi->erase(p->type, p->id, p->psi);
84 85
85 return simple_unlink(dir, dentry); 86 return simple_unlink(dir, dentry);
86} 87}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 57bbf9078ac8..9ec22d3b4293 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -122,7 +122,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
122 memcpy(dst, s1 + s1_start, l1_cpy); 122 memcpy(dst, s1 + s1_start, l1_cpy);
123 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); 123 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
124 124
125 ret = psinfo->write(PSTORE_TYPE_DMESG, &id, part, 125 ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
126 hsize + l1_cpy + l2_cpy, psinfo); 126 hsize + l1_cpy + l2_cpy, psinfo);
127 if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) 127 if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
128 pstore_new_entry = 1; 128 pstore_new_entry = 1;
@@ -207,8 +207,7 @@ void pstore_get_records(int quiet)
207 return; 207 return;
208 208
209 mutex_lock(&psi->read_mutex); 209 mutex_lock(&psi->read_mutex);
210 rc = psi->open(psi); 210 if (psi->open && psi->open(psi))
211 if (rc)
212 goto out; 211 goto out;
213 212
214 while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) { 213 while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
@@ -219,7 +218,8 @@ void pstore_get_records(int quiet)
219 if (rc && (rc != -EEXIST || !quiet)) 218 if (rc && (rc != -EEXIST || !quiet))
220 failed++; 219 failed++;
221 } 220 }
222 psi->close(psi); 221 if (psi->close)
222 psi->close(psi);
223out: 223out:
224 mutex_unlock(&psi->read_mutex); 224 mutex_unlock(&psi->read_mutex);
225 225
@@ -243,33 +243,5 @@ static void pstore_timefunc(unsigned long dummy)
243 mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL); 243 mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL);
244} 244}
245 245
246/*
247 * Call platform driver to write a record to the
248 * persistent store.
249 */
250int pstore_write(enum pstore_type_id type, char *buf, size_t size)
251{
252 u64 id;
253 int ret;
254 unsigned long flags;
255
256 if (!psinfo)
257 return -ENODEV;
258
259 if (size > psinfo->bufsize)
260 return -EFBIG;
261
262 spin_lock_irqsave(&psinfo->buf_lock, flags);
263 memcpy(psinfo->buf, buf, size);
264 ret = psinfo->write(type, &id, 0, size, psinfo);
265 if (ret == 0 && pstore_is_mounted())
266 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
267 size, CURRENT_TIME, psinfo);
268 spin_unlock_irqrestore(&psinfo->buf_lock, flags);
269
270 return 0;
271}
272EXPORT_SYMBOL_GPL(pstore_write);
273
274module_param(backend, charp, 0444); 246module_param(backend, charp, 0444);
275MODULE_PARM_DESC(backend, "Pstore backend to use"); 247MODULE_PARM_DESC(backend, "Pstore backend to use");
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 3bdd21418432..2bfd987f4853 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -199,12 +199,13 @@ static const char *qnx4_checkroot(struct super_block *sb)
199 if (!strcmp(rootdir->di_fname, 199 if (!strcmp(rootdir->di_fname,
200 QNX4_BMNAME)) { 200 QNX4_BMNAME)) {
201 found = 1; 201 found = 1;
202 qnx4_sb(sb)->BitMap = kmalloc( sizeof( struct qnx4_inode_entry ), GFP_KERNEL ); 202 qnx4_sb(sb)->BitMap = kmemdup(rootdir,
203 sizeof(struct qnx4_inode_entry),
204 GFP_KERNEL);
203 if (!qnx4_sb(sb)->BitMap) { 205 if (!qnx4_sb(sb)->BitMap) {
204 brelse (bh); 206 brelse (bh);
205 return "not enough memory for bitmap inode"; 207 return "not enough memory for bitmap inode";
206 } 208 }/* keep bitmap inode known */
207 memcpy( qnx4_sb(sb)->BitMap, rootdir, sizeof( struct qnx4_inode_entry ) ); /* keep bitmap inode known */
208 break; 209 break;
209 } 210 }
210 } 211 }
@@ -427,7 +428,6 @@ static struct inode *qnx4_alloc_inode(struct super_block *sb)
427static void qnx4_i_callback(struct rcu_head *head) 428static void qnx4_i_callback(struct rcu_head *head)
428{ 429{
429 struct inode *inode = container_of(head, struct inode, i_rcu); 430 struct inode *inode = container_of(head, struct inode, i_rcu);
430 INIT_LIST_HEAD(&inode->i_dentry);
431 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); 431 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
432} 432}
433 433
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5b572c89e6c4..5ec59b20cf76 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -73,7 +73,6 @@
73#include <linux/security.h> 73#include <linux/security.h>
74#include <linux/kmod.h> 74#include <linux/kmod.h>
75#include <linux/namei.h> 75#include <linux/namei.h>
76#include <linux/buffer_head.h>
77#include <linux/capability.h> 76#include <linux/capability.h>
78#include <linux/quotaops.h> 77#include <linux/quotaops.h>
79#include "../internal.h" /* ugh */ 78#include "../internal.h" /* ugh */
@@ -2199,7 +2198,7 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id,
2199 if (error) 2198 if (error)
2200 return error; 2199 return error;
2201 /* Quota file not on the same filesystem? */ 2200 /* Quota file not on the same filesystem? */
2202 if (path->mnt->mnt_sb != sb) 2201 if (path->dentry->d_sb != sb)
2203 error = -EXDEV; 2202 error = -EXDEV;
2204 else 2203 else
2205 error = vfs_load_quota_inode(path->dentry->d_inode, type, 2204 error = vfs_load_quota_inode(path->dentry->d_inode, type,
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 35f4b0ecdeb3..7898cd688a00 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -13,7 +13,6 @@
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <linux/security.h> 14#include <linux/security.h>
15#include <linux/syscalls.h> 15#include <linux/syscalls.h>
16#include <linux/buffer_head.h>
17#include <linux/capability.h> 16#include <linux/capability.h>
18#include <linux/quotaops.h> 17#include <linux/quotaops.h>
19#include <linux/types.h> 18#include <linux/types.h>
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 462ceb38fec6..aec766abe3af 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -52,7 +52,7 @@ static struct backing_dev_info ramfs_backing_dev_info = {
52}; 52};
53 53
54struct inode *ramfs_get_inode(struct super_block *sb, 54struct inode *ramfs_get_inode(struct super_block *sb,
55 const struct inode *dir, int mode, dev_t dev) 55 const struct inode *dir, umode_t mode, dev_t dev)
56{ 56{
57 struct inode * inode = new_inode(sb); 57 struct inode * inode = new_inode(sb);
58 58
@@ -92,7 +92,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
92 */ 92 */
93/* SMP-safe */ 93/* SMP-safe */
94static int 94static int
95ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 95ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
96{ 96{
97 struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev); 97 struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
98 int error = -ENOSPC; 98 int error = -ENOSPC;
@@ -106,7 +106,7 @@ ramfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
106 return error; 106 return error;
107} 107}
108 108
109static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) 109static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
110{ 110{
111 int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0); 111 int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0);
112 if (!retval) 112 if (!retval)
@@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
114 return retval; 114 return retval;
115} 115}
116 116
117static int ramfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 117static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd)
118{ 118{
119 return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); 119 return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
120} 120}
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index d1aca1df4f92..a945cd265228 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -13,6 +13,7 @@
13#include <linux/reiserfs_fs_sb.h> 13#include <linux/reiserfs_fs_sb.h>
14#include <linux/reiserfs_fs_i.h> 14#include <linux/reiserfs_fs_i.h>
15#include <linux/quotaops.h> 15#include <linux/quotaops.h>
16#include <linux/seq_file.h>
16 17
17#define PREALLOCATION_SIZE 9 18#define PREALLOCATION_SIZE 9
18 19
@@ -634,6 +635,96 @@ int reiserfs_parse_alloc_options(struct super_block *s, char *options)
634 return 0; 635 return 0;
635} 636}
636 637
638static void print_sep(struct seq_file *seq, int *first)
639{
640 if (!*first)
641 seq_puts(seq, ":");
642 else
643 *first = 0;
644}
645
646void show_alloc_options(struct seq_file *seq, struct super_block *s)
647{
648 int first = 1;
649
650 if (SB_ALLOC_OPTS(s) == ((1 << _ALLOC_skip_busy) |
651 (1 << _ALLOC_dirid_groups) | (1 << _ALLOC_packing_groups)))
652 return;
653
654 seq_puts(seq, ",alloc=");
655
656 if (TEST_OPTION(concentrating_formatted_nodes, s)) {
657 print_sep(seq, &first);
658 if (REISERFS_SB(s)->s_alloc_options.border != 10) {
659 seq_printf(seq, "concentrating_formatted_nodes=%d",
660 100 / REISERFS_SB(s)->s_alloc_options.border);
661 } else
662 seq_puts(seq, "concentrating_formatted_nodes");
663 }
664 if (TEST_OPTION(displacing_large_files, s)) {
665 print_sep(seq, &first);
666 if (REISERFS_SB(s)->s_alloc_options.large_file_size != 16) {
667 seq_printf(seq, "displacing_large_files=%lu",
668 REISERFS_SB(s)->s_alloc_options.large_file_size);
669 } else
670 seq_puts(seq, "displacing_large_files");
671 }
672 if (TEST_OPTION(displacing_new_packing_localities, s)) {
673 print_sep(seq, &first);
674 seq_puts(seq, "displacing_new_packing_localities");
675 }
676 if (TEST_OPTION(old_hashed_relocation, s)) {
677 print_sep(seq, &first);
678 seq_puts(seq, "old_hashed_relocation");
679 }
680 if (TEST_OPTION(new_hashed_relocation, s)) {
681 print_sep(seq, &first);
682 seq_puts(seq, "new_hashed_relocation");
683 }
684 if (TEST_OPTION(dirid_groups, s)) {
685 print_sep(seq, &first);
686 seq_puts(seq, "dirid_groups");
687 }
688 if (TEST_OPTION(oid_groups, s)) {
689 print_sep(seq, &first);
690 seq_puts(seq, "oid_groups");
691 }
692 if (TEST_OPTION(packing_groups, s)) {
693 print_sep(seq, &first);
694 seq_puts(seq, "packing_groups");
695 }
696 if (TEST_OPTION(hashed_formatted_nodes, s)) {
697 print_sep(seq, &first);
698 seq_puts(seq, "hashed_formatted_nodes");
699 }
700 if (TEST_OPTION(skip_busy, s)) {
701 print_sep(seq, &first);
702 seq_puts(seq, "skip_busy");
703 }
704 if (TEST_OPTION(hundredth_slices, s)) {
705 print_sep(seq, &first);
706 seq_puts(seq, "hundredth_slices");
707 }
708 if (TEST_OPTION(old_way, s)) {
709 print_sep(seq, &first);
710 seq_puts(seq, "old_way");
711 }
712 if (TEST_OPTION(displace_based_on_dirid, s)) {
713 print_sep(seq, &first);
714 seq_puts(seq, "displace_based_on_dirid");
715 }
716 if (REISERFS_SB(s)->s_alloc_options.preallocmin != 0) {
717 print_sep(seq, &first);
718 seq_printf(seq, "preallocmin=%d",
719 REISERFS_SB(s)->s_alloc_options.preallocmin);
720 }
721 if (REISERFS_SB(s)->s_alloc_options.preallocsize != 17) {
722 print_sep(seq, &first);
723 seq_printf(seq, "preallocsize=%d",
724 REISERFS_SB(s)->s_alloc_options.preallocsize);
725 }
726}
727
637static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint) 728static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
638{ 729{
639 char *hash_in; 730 char *hash_in;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 950f13af0951..9e8cd5acd79c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1766,7 +1766,7 @@ static int reiserfs_new_symlink(struct reiserfs_transaction_handle *th, struct i
1766 for the fresh inode. This can only be done outside a transaction, so 1766 for the fresh inode. This can only be done outside a transaction, so
1767 if we return non-zero, we also end the transaction. */ 1767 if we return non-zero, we also end the transaction. */
1768int reiserfs_new_inode(struct reiserfs_transaction_handle *th, 1768int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
1769 struct inode *dir, int mode, const char *symname, 1769 struct inode *dir, umode_t mode, const char *symname,
1770 /* 0 for regular, EMTRY_DIR_SIZE for dirs, 1770 /* 0 for regular, EMTRY_DIR_SIZE for dirs,
1771 strlen (symname) for symlinks) */ 1771 strlen (symname) for symlinks) */
1772 loff_t i_size, struct dentry *dentry, 1772 loff_t i_size, struct dentry *dentry,
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 4e153051bc75..950e3d1b5c9e 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -55,7 +55,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
55 break; 55 break;
56 } 56 }
57 57
58 err = mnt_want_write(filp->f_path.mnt); 58 err = mnt_want_write_file(filp);
59 if (err) 59 if (err)
60 break; 60 break;
61 61
@@ -96,7 +96,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
96 inode->i_ctime = CURRENT_TIME_SEC; 96 inode->i_ctime = CURRENT_TIME_SEC;
97 mark_inode_dirty(inode); 97 mark_inode_dirty(inode);
98setflags_out: 98setflags_out:
99 mnt_drop_write(filp->f_path.mnt); 99 mnt_drop_write_file(filp);
100 break; 100 break;
101 } 101 }
102 case REISERFS_IOC_GETVERSION: 102 case REISERFS_IOC_GETVERSION:
@@ -107,7 +107,7 @@ setflags_out:
107 err = -EPERM; 107 err = -EPERM;
108 break; 108 break;
109 } 109 }
110 err = mnt_want_write(filp->f_path.mnt); 110 err = mnt_want_write_file(filp);
111 if (err) 111 if (err)
112 break; 112 break;
113 if (get_user(inode->i_generation, (int __user *)arg)) { 113 if (get_user(inode->i_generation, (int __user *)arg)) {
@@ -117,7 +117,7 @@ setflags_out:
117 inode->i_ctime = CURRENT_TIME_SEC; 117 inode->i_ctime = CURRENT_TIME_SEC;
118 mark_inode_dirty(inode); 118 mark_inode_dirty(inode);
119setversion_out: 119setversion_out:
120 mnt_drop_write(filp->f_path.mnt); 120 mnt_drop_write_file(filp);
121 break; 121 break;
122 default: 122 default:
123 err = -ENOTTY; 123 err = -ENOTTY;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 80058e8ce361..146378865239 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -559,7 +559,7 @@ static int drop_new_inode(struct inode *inode)
559** outside of a transaction, so we had to pull some bits of 559** outside of a transaction, so we had to pull some bits of
560** reiserfs_new_inode out into this func. 560** reiserfs_new_inode out into this func.
561*/ 561*/
562static int new_inode_init(struct inode *inode, struct inode *dir, int mode) 562static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
563{ 563{
564 /* Make inode invalid - just in case we are going to drop it before 564 /* Make inode invalid - just in case we are going to drop it before
565 * the initialization happens */ 565 * the initialization happens */
@@ -572,7 +572,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, int mode)
572 return 0; 572 return 0;
573} 573}
574 574
575static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode, 575static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
576 struct nameidata *nd) 576 struct nameidata *nd)
577{ 577{
578 int retval; 578 int retval;
@@ -643,7 +643,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
643 return retval; 643 return retval;
644} 644}
645 645
646static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode, 646static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
647 dev_t rdev) 647 dev_t rdev)
648{ 648{
649 int retval; 649 int retval;
@@ -721,7 +721,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
721 return retval; 721 return retval;
722} 722}
723 723
724static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 724static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
725{ 725{
726 int retval; 726 int retval;
727 struct inode *inode; 727 struct inode *inode;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 14363b96b6af..19c454e61b79 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,6 +28,7 @@
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/namei.h> 29#include <linux/namei.h>
30#include <linux/crc32.h> 30#include <linux/crc32.h>
31#include <linux/seq_file.h>
31 32
32struct file_system_type reiserfs_fs_type; 33struct file_system_type reiserfs_fs_type;
33 34
@@ -61,6 +62,7 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs)
61 62
62static int reiserfs_remount(struct super_block *s, int *flags, char *data); 63static int reiserfs_remount(struct super_block *s, int *flags, char *data);
63static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); 64static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
65void show_alloc_options(struct seq_file *seq, struct super_block *s);
64 66
65static int reiserfs_sync_fs(struct super_block *s, int wait) 67static int reiserfs_sync_fs(struct super_block *s, int wait)
66{ 68{
@@ -532,7 +534,6 @@ static struct inode *reiserfs_alloc_inode(struct super_block *sb)
532static void reiserfs_i_callback(struct rcu_head *head) 534static void reiserfs_i_callback(struct rcu_head *head)
533{ 535{
534 struct inode *inode = container_of(head, struct inode, i_rcu); 536 struct inode *inode = container_of(head, struct inode, i_rcu);
535 INIT_LIST_HEAD(&inode->i_dentry);
536 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 537 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
537} 538}
538 539
@@ -597,6 +598,82 @@ out:
597 reiserfs_write_unlock_once(inode->i_sb, lock_depth); 598 reiserfs_write_unlock_once(inode->i_sb, lock_depth);
598} 599}
599 600
601static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
602{
603 struct super_block *s = root->d_sb;
604 struct reiserfs_journal *journal = SB_JOURNAL(s);
605 long opts = REISERFS_SB(s)->s_mount_opt;
606
607 if (opts & (1 << REISERFS_LARGETAIL))
608 seq_puts(seq, ",tails=on");
609 else if (!(opts & (1 << REISERFS_SMALLTAIL)))
610 seq_puts(seq, ",notail");
611 /* tails=small is default so we don't show it */
612
613 if (!(opts & (1 << REISERFS_BARRIER_FLUSH)))
614 seq_puts(seq, ",barrier=none");
615 /* barrier=flush is default so we don't show it */
616
617 if (opts & (1 << REISERFS_ERROR_CONTINUE))
618 seq_puts(seq, ",errors=continue");
619 else if (opts & (1 << REISERFS_ERROR_PANIC))
620 seq_puts(seq, ",errors=panic");
621 /* errors=ro is default so we don't show it */
622
623 if (opts & (1 << REISERFS_DATA_LOG))
624 seq_puts(seq, ",data=journal");
625 else if (opts & (1 << REISERFS_DATA_WRITEBACK))
626 seq_puts(seq, ",data=writeback");
627 /* data=ordered is default so we don't show it */
628
629 if (opts & (1 << REISERFS_ATTRS))
630 seq_puts(seq, ",attrs");
631
632 if (opts & (1 << REISERFS_XATTRS_USER))
633 seq_puts(seq, ",user_xattr");
634
635 if (opts & (1 << REISERFS_EXPOSE_PRIVROOT))
636 seq_puts(seq, ",expose_privroot");
637
638 if (opts & (1 << REISERFS_POSIXACL))
639 seq_puts(seq, ",acl");
640
641 if (REISERFS_SB(s)->s_jdev)
642 seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev);
643
644 if (journal->j_max_commit_age != journal->j_default_max_commit_age)
645 seq_printf(seq, ",commit=%d", journal->j_max_commit_age);
646
647#ifdef CONFIG_QUOTA
648 if (REISERFS_SB(s)->s_qf_names[USRQUOTA])
649 seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]);
650 else if (opts & (1 << REISERFS_USRQUOTA))
651 seq_puts(seq, ",usrquota");
652 if (REISERFS_SB(s)->s_qf_names[GRPQUOTA])
653 seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
654 else if (opts & (1 << REISERFS_GRPQUOTA))
655 seq_puts(seq, ",grpquota");
656 if (REISERFS_SB(s)->s_jquota_fmt) {
657 if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_OLD)
658 seq_puts(seq, ",jqfmt=vfsold");
659 else if (REISERFS_SB(s)->s_jquota_fmt == QFMT_VFS_V0)
660 seq_puts(seq, ",jqfmt=vfsv0");
661 }
662#endif
663
664 /* Block allocator options */
665 if (opts & (1 << REISERFS_NO_BORDER))
666 seq_puts(seq, ",block-allocator=noborder");
667 if (opts & (1 << REISERFS_NO_UNHASHED_RELOCATION))
668 seq_puts(seq, ",block-allocator=no_unhashed_relocation");
669 if (opts & (1 << REISERFS_HASHED_RELOCATION))
670 seq_puts(seq, ",block-allocator=hashed_relocation");
671 if (opts & (1 << REISERFS_TEST4))
672 seq_puts(seq, ",block-allocator=test4");
673 show_alloc_options(seq, s);
674 return 0;
675}
676
600#ifdef CONFIG_QUOTA 677#ifdef CONFIG_QUOTA
601static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, 678static ssize_t reiserfs_quota_write(struct super_block *, int, const char *,
602 size_t, loff_t); 679 size_t, loff_t);
@@ -617,7 +694,7 @@ static const struct super_operations reiserfs_sops = {
617 .unfreeze_fs = reiserfs_unfreeze, 694 .unfreeze_fs = reiserfs_unfreeze,
618 .statfs = reiserfs_statfs, 695 .statfs = reiserfs_statfs,
619 .remount_fs = reiserfs_remount, 696 .remount_fs = reiserfs_remount,
620 .show_options = generic_show_options, 697 .show_options = reiserfs_show_options,
621#ifdef CONFIG_QUOTA 698#ifdef CONFIG_QUOTA
622 .quota_read = reiserfs_quota_read, 699 .quota_read = reiserfs_quota_read,
623 .quota_write = reiserfs_quota_write, 700 .quota_write = reiserfs_quota_write,
@@ -915,9 +992,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
915 {"jdev",.arg_required = 'j',.values = NULL}, 992 {"jdev",.arg_required = 'j',.values = NULL},
916 {"nolargeio",.arg_required = 'w',.values = NULL}, 993 {"nolargeio",.arg_required = 'w',.values = NULL},
917 {"commit",.arg_required = 'c',.values = NULL}, 994 {"commit",.arg_required = 'c',.values = NULL},
918 {"usrquota",.setmask = 1 << REISERFS_QUOTA}, 995 {"usrquota",.setmask = 1 << REISERFS_USRQUOTA},
919 {"grpquota",.setmask = 1 << REISERFS_QUOTA}, 996 {"grpquota",.setmask = 1 << REISERFS_GRPQUOTA},
920 {"noquota",.clrmask = 1 << REISERFS_QUOTA}, 997 {"noquota",.clrmask = 1 << REISERFS_USRQUOTA | 1 << REISERFS_GRPQUOTA},
921 {"errors",.arg_required = 'e',.values = error_actions}, 998 {"errors",.arg_required = 'e',.values = error_actions},
922 {"usrjquota",.arg_required = 999 {"usrjquota",.arg_required =
923 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL}, 1000 'u' | (1 << REISERFS_OPT_ALLOWEMPTY),.values = NULL},
@@ -1031,12 +1108,19 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1031 return 0; 1108 return 0;
1032 } 1109 }
1033 strcpy(qf_names[qtype], arg); 1110 strcpy(qf_names[qtype], arg);
1034 *mount_options |= 1 << REISERFS_QUOTA; 1111 if (qtype == USRQUOTA)
1112 *mount_options |= 1 << REISERFS_USRQUOTA;
1113 else
1114 *mount_options |= 1 << REISERFS_GRPQUOTA;
1035 } else { 1115 } else {
1036 if (qf_names[qtype] != 1116 if (qf_names[qtype] !=
1037 REISERFS_SB(s)->s_qf_names[qtype]) 1117 REISERFS_SB(s)->s_qf_names[qtype])
1038 kfree(qf_names[qtype]); 1118 kfree(qf_names[qtype]);
1039 qf_names[qtype] = NULL; 1119 qf_names[qtype] = NULL;
1120 if (qtype == USRQUOTA)
1121 *mount_options &= ~(1 << REISERFS_USRQUOTA);
1122 else
1123 *mount_options &= ~(1 << REISERFS_GRPQUOTA);
1040 } 1124 }
1041 } 1125 }
1042 if (c == 'f') { 1126 if (c == 'f') {
@@ -1075,9 +1159,10 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1075 "journaled quota format not specified."); 1159 "journaled quota format not specified.");
1076 return 0; 1160 return 0;
1077 } 1161 }
1078 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ 1162 if ((!(*mount_options & (1 << REISERFS_USRQUOTA)) &&
1079 if (!(*mount_options & (1 << REISERFS_QUOTA)) 1163 sb_has_quota_loaded(s, USRQUOTA)) ||
1080 && sb_any_quota_loaded(s)) { 1164 (!(*mount_options & (1 << REISERFS_GRPQUOTA)) &&
1165 sb_has_quota_loaded(s, GRPQUOTA))) {
1081 reiserfs_warning(s, "super-6516", "quota options must " 1166 reiserfs_warning(s, "super-6516", "quota options must "
1082 "be present when quota is turned on."); 1167 "be present when quota is turned on.");
1083 return 0; 1168 return 0;
@@ -1225,7 +1310,8 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1225 safe_mask |= 1 << REISERFS_ERROR_RO; 1310 safe_mask |= 1 << REISERFS_ERROR_RO;
1226 safe_mask |= 1 << REISERFS_ERROR_CONTINUE; 1311 safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
1227 safe_mask |= 1 << REISERFS_ERROR_PANIC; 1312 safe_mask |= 1 << REISERFS_ERROR_PANIC;
1228 safe_mask |= 1 << REISERFS_QUOTA; 1313 safe_mask |= 1 << REISERFS_USRQUOTA;
1314 safe_mask |= 1 << REISERFS_GRPQUOTA;
1229 1315
1230 /* Update the bitmask, taking care to keep 1316 /* Update the bitmask, taking care to keep
1231 * the bits we're not allowed to change here */ 1317 * the bits we're not allowed to change here */
@@ -1672,6 +1758,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1672 &commit_max_age, qf_names, &qfmt) == 0) { 1758 &commit_max_age, qf_names, &qfmt) == 0) {
1673 goto error; 1759 goto error;
1674 } 1760 }
1761 if (jdev_name && jdev_name[0]) {
1762 REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL);
1763 if (!REISERFS_SB(s)->s_jdev) {
1764 SWARN(silent, s, "", "Cannot allocate memory for "
1765 "journal device name");
1766 goto error;
1767 }
1768 }
1675#ifdef CONFIG_QUOTA 1769#ifdef CONFIG_QUOTA
1676 handle_quota_files(s, qf_names, &qfmt); 1770 handle_quota_files(s, qf_names, &qfmt);
1677#endif 1771#endif
@@ -2054,12 +2148,13 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2054 int err; 2148 int err;
2055 struct inode *inode; 2149 struct inode *inode;
2056 struct reiserfs_transaction_handle th; 2150 struct reiserfs_transaction_handle th;
2151 int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;
2057 2152
2058 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) 2153 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
2059 return -EINVAL; 2154 return -EINVAL;
2060 2155
2061 /* Quotafile not on the same filesystem? */ 2156 /* Quotafile not on the same filesystem? */
2062 if (path->mnt->mnt_sb != sb) { 2157 if (path->dentry->d_sb != sb) {
2063 err = -EXDEV; 2158 err = -EXDEV;
2064 goto out; 2159 goto out;
2065 } 2160 }
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 6bc346c160e7..c24deda8a8bc 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -66,7 +66,7 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
66} 66}
67#endif 67#endif
68 68
69static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode) 69static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
70{ 70{
71 BUG_ON(!mutex_is_locked(&dir->i_mutex)); 71 BUG_ON(!mutex_is_locked(&dir->i_mutex));
72 return dir->i_op->mkdir(dir, dentry, mode); 72 return dir->i_op->mkdir(dir, dentry, mode);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 8b4089f30408..bb36ab74eb45 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -403,7 +403,6 @@ static struct inode *romfs_alloc_inode(struct super_block *sb)
403static void romfs_i_callback(struct rcu_head *head) 403static void romfs_i_callback(struct rcu_head *head)
404{ 404{
405 struct inode *inode = container_of(head, struct inode, i_rcu); 405 struct inode *inode = container_of(head, struct inode, i_rcu);
406 INIT_LIST_HEAD(&inode->i_dentry);
407 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 406 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
408} 407}
409 408
diff --git a/fs/seq_file.c b/fs/seq_file.c
index dba43c3ea3af..4023d6be939b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -397,7 +397,7 @@ EXPORT_SYMBOL(seq_printf);
397 * Returns pointer past last written character in @s, or NULL in case of 397 * Returns pointer past last written character in @s, or NULL in case of
398 * failure. 398 * failure.
399 */ 399 */
400char *mangle_path(char *s, char *p, char *esc) 400char *mangle_path(char *s, const char *p, const char *esc)
401{ 401{
402 while (s <= p) { 402 while (s <= p) {
403 char c = *p++; 403 char c = *p++;
@@ -427,7 +427,7 @@ EXPORT_SYMBOL(mangle_path);
427 * return the absolute path of 'path', as represented by the 427 * return the absolute path of 'path', as represented by the
428 * dentry / mnt pair in the path parameter. 428 * dentry / mnt pair in the path parameter.
429 */ 429 */
430int seq_path(struct seq_file *m, struct path *path, char *esc) 430int seq_path(struct seq_file *m, const struct path *path, const char *esc)
431{ 431{
432 char *buf; 432 char *buf;
433 size_t size = seq_get_buf(m, &buf); 433 size_t size = seq_get_buf(m, &buf);
@@ -450,8 +450,8 @@ EXPORT_SYMBOL(seq_path);
450/* 450/*
451 * Same as seq_path, but relative to supplied root. 451 * Same as seq_path, but relative to supplied root.
452 */ 452 */
453int seq_path_root(struct seq_file *m, struct path *path, struct path *root, 453int seq_path_root(struct seq_file *m, const struct path *path,
454 char *esc) 454 const struct path *root, const char *esc)
455{ 455{
456 char *buf; 456 char *buf;
457 size_t size = seq_get_buf(m, &buf); 457 size_t size = seq_get_buf(m, &buf);
@@ -480,7 +480,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
480/* 480/*
481 * returns the path of the 'dentry' from the root of its filesystem. 481 * returns the path of the 'dentry' from the root of its filesystem.
482 */ 482 */
483int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) 483int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc)
484{ 484{
485 char *buf; 485 char *buf;
486 size_t size = seq_get_buf(m, &buf); 486 size_t size = seq_get_buf(m, &buf);
diff --git a/fs/splice.c b/fs/splice.c
index fa2defa8afcf..1ec0493266b3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -25,7 +25,6 @@
25#include <linux/mm_inline.h> 25#include <linux/mm_inline.h>
26#include <linux/swap.h> 26#include <linux/swap.h>
27#include <linux/writeback.h> 27#include <linux/writeback.h>
28#include <linux/buffer_head.h>
29#include <linux/module.h> 28#include <linux/module.h>
30#include <linux/syscalls.h> 29#include <linux/syscalls.h>
31#include <linux/uio.h> 30#include <linux/uio.h>
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 2da1715452ac..d0858c2d9a47 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -464,7 +464,6 @@ static struct inode *squashfs_alloc_inode(struct super_block *sb)
464static void squashfs_i_callback(struct rcu_head *head) 464static void squashfs_i_callback(struct rcu_head *head)
465{ 465{
466 struct inode *inode = container_of(head, struct inode, i_rcu); 466 struct inode *inode = container_of(head, struct inode, i_rcu);
467 INIT_LIST_HEAD(&inode->i_dentry);
468 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode)); 467 kmem_cache_free(squashfs_inode_cachep, squashfs_i(inode));
469} 468}
470 469
diff --git a/fs/statfs.c b/fs/statfs.c
index 9cf04a118965..2aa6a22e0be2 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -7,6 +7,7 @@
7#include <linux/statfs.h> 7#include <linux/statfs.h>
8#include <linux/security.h> 8#include <linux/security.h>
9#include <linux/uaccess.h> 9#include <linux/uaccess.h>
10#include "internal.h"
10 11
11static int flags_by_mnt(int mnt_flags) 12static int flags_by_mnt(int mnt_flags)
12{ 13{
@@ -45,7 +46,7 @@ static int calculate_f_flags(struct vfsmount *mnt)
45 flags_by_sb(mnt->mnt_sb->s_flags); 46 flags_by_sb(mnt->mnt_sb->s_flags);
46} 47}
47 48
48int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf) 49static int statfs_by_dentry(struct dentry *dentry, struct kstatfs *buf)
49{ 50{
50 int retval; 51 int retval;
51 52
@@ -205,19 +206,23 @@ SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user
205 return error; 206 return error;
206} 207}
207 208
208SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) 209int vfs_ustat(dev_t dev, struct kstatfs *sbuf)
209{ 210{
210 struct super_block *s; 211 struct super_block *s = user_get_super(dev);
211 struct ustat tmp;
212 struct kstatfs sbuf;
213 int err; 212 int err;
214
215 s = user_get_super(new_decode_dev(dev));
216 if (!s) 213 if (!s)
217 return -EINVAL; 214 return -EINVAL;
218 215
219 err = statfs_by_dentry(s->s_root, &sbuf); 216 err = statfs_by_dentry(s->s_root, sbuf);
220 drop_super(s); 217 drop_super(s);
218 return err;
219}
220
221SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf)
222{
223 struct ustat tmp;
224 struct kstatfs sbuf;
225 int err = vfs_ustat(new_decode_dev(dev), &sbuf);
221 if (err) 226 if (err)
222 return err; 227 return err;
223 228
diff --git a/fs/super.c b/fs/super.c
index afd0f1ad45e0..de41e1e46f09 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -136,12 +136,13 @@ static struct super_block *alloc_super(struct file_system_type *type)
136 INIT_LIST_HEAD(&s->s_files); 136 INIT_LIST_HEAD(&s->s_files);
137#endif 137#endif
138 s->s_bdi = &default_backing_dev_info; 138 s->s_bdi = &default_backing_dev_info;
139 INIT_LIST_HEAD(&s->s_instances); 139 INIT_HLIST_NODE(&s->s_instances);
140 INIT_HLIST_BL_HEAD(&s->s_anon); 140 INIT_HLIST_BL_HEAD(&s->s_anon);
141 INIT_LIST_HEAD(&s->s_inodes); 141 INIT_LIST_HEAD(&s->s_inodes);
142 INIT_LIST_HEAD(&s->s_dentry_lru); 142 INIT_LIST_HEAD(&s->s_dentry_lru);
143 INIT_LIST_HEAD(&s->s_inode_lru); 143 INIT_LIST_HEAD(&s->s_inode_lru);
144 spin_lock_init(&s->s_inode_lru_lock); 144 spin_lock_init(&s->s_inode_lru_lock);
145 INIT_LIST_HEAD(&s->s_mounts);
145 init_rwsem(&s->s_umount); 146 init_rwsem(&s->s_umount);
146 mutex_init(&s->s_lock); 147 mutex_init(&s->s_lock);
147 lockdep_set_class(&s->s_umount, &type->s_umount_key); 148 lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -200,6 +201,7 @@ static inline void destroy_super(struct super_block *s)
200 free_percpu(s->s_files); 201 free_percpu(s->s_files);
201#endif 202#endif
202 security_sb_free(s); 203 security_sb_free(s);
204 WARN_ON(!list_empty(&s->s_mounts));
203 kfree(s->s_subtype); 205 kfree(s->s_subtype);
204 kfree(s->s_options); 206 kfree(s->s_options);
205 kfree(s); 207 kfree(s);
@@ -210,7 +212,7 @@ static inline void destroy_super(struct super_block *s)
210/* 212/*
211 * Drop a superblock's refcount. The caller must hold sb_lock. 213 * Drop a superblock's refcount. The caller must hold sb_lock.
212 */ 214 */
213void __put_super(struct super_block *sb) 215static void __put_super(struct super_block *sb)
214{ 216{
215 if (!--sb->s_count) { 217 if (!--sb->s_count) {
216 list_del_init(&sb->s_list); 218 list_del_init(&sb->s_list);
@@ -225,7 +227,7 @@ void __put_super(struct super_block *sb)
225 * Drops a temporary reference, frees superblock if there's no 227 * Drops a temporary reference, frees superblock if there's no
226 * references left. 228 * references left.
227 */ 229 */
228void put_super(struct super_block *sb) 230static void put_super(struct super_block *sb)
229{ 231{
230 spin_lock(&sb_lock); 232 spin_lock(&sb_lock);
231 __put_super(sb); 233 __put_super(sb);
@@ -328,7 +330,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
328bool grab_super_passive(struct super_block *sb) 330bool grab_super_passive(struct super_block *sb)
329{ 331{
330 spin_lock(&sb_lock); 332 spin_lock(&sb_lock);
331 if (list_empty(&sb->s_instances)) { 333 if (hlist_unhashed(&sb->s_instances)) {
332 spin_unlock(&sb_lock); 334 spin_unlock(&sb_lock);
333 return false; 335 return false;
334 } 336 }
@@ -337,7 +339,7 @@ bool grab_super_passive(struct super_block *sb)
337 spin_unlock(&sb_lock); 339 spin_unlock(&sb_lock);
338 340
339 if (down_read_trylock(&sb->s_umount)) { 341 if (down_read_trylock(&sb->s_umount)) {
340 if (sb->s_root) 342 if (sb->s_root && (sb->s_flags & MS_BORN))
341 return true; 343 return true;
342 up_read(&sb->s_umount); 344 up_read(&sb->s_umount);
343 } 345 }
@@ -400,7 +402,7 @@ void generic_shutdown_super(struct super_block *sb)
400 } 402 }
401 spin_lock(&sb_lock); 403 spin_lock(&sb_lock);
402 /* should be initialized for __put_super_and_need_restart() */ 404 /* should be initialized for __put_super_and_need_restart() */
403 list_del_init(&sb->s_instances); 405 hlist_del_init(&sb->s_instances);
404 spin_unlock(&sb_lock); 406 spin_unlock(&sb_lock);
405 up_write(&sb->s_umount); 407 up_write(&sb->s_umount);
406} 408}
@@ -420,13 +422,14 @@ struct super_block *sget(struct file_system_type *type,
420 void *data) 422 void *data)
421{ 423{
422 struct super_block *s = NULL; 424 struct super_block *s = NULL;
425 struct hlist_node *node;
423 struct super_block *old; 426 struct super_block *old;
424 int err; 427 int err;
425 428
426retry: 429retry:
427 spin_lock(&sb_lock); 430 spin_lock(&sb_lock);
428 if (test) { 431 if (test) {
429 list_for_each_entry(old, &type->fs_supers, s_instances) { 432 hlist_for_each_entry(old, node, &type->fs_supers, s_instances) {
430 if (!test(old, data)) 433 if (!test(old, data))
431 continue; 434 continue;
432 if (!grab_super(old)) 435 if (!grab_super(old))
@@ -462,7 +465,7 @@ retry:
462 s->s_type = type; 465 s->s_type = type;
463 strlcpy(s->s_id, type->name, sizeof(s->s_id)); 466 strlcpy(s->s_id, type->name, sizeof(s->s_id));
464 list_add_tail(&s->s_list, &super_blocks); 467 list_add_tail(&s->s_list, &super_blocks);
465 list_add(&s->s_instances, &type->fs_supers); 468 hlist_add_head(&s->s_instances, &type->fs_supers);
466 spin_unlock(&sb_lock); 469 spin_unlock(&sb_lock);
467 get_filesystem(type); 470 get_filesystem(type);
468 register_shrinker(&s->s_shrink); 471 register_shrinker(&s->s_shrink);
@@ -497,14 +500,14 @@ void sync_supers(void)
497 500
498 spin_lock(&sb_lock); 501 spin_lock(&sb_lock);
499 list_for_each_entry(sb, &super_blocks, s_list) { 502 list_for_each_entry(sb, &super_blocks, s_list) {
500 if (list_empty(&sb->s_instances)) 503 if (hlist_unhashed(&sb->s_instances))
501 continue; 504 continue;
502 if (sb->s_op->write_super && sb->s_dirt) { 505 if (sb->s_op->write_super && sb->s_dirt) {
503 sb->s_count++; 506 sb->s_count++;
504 spin_unlock(&sb_lock); 507 spin_unlock(&sb_lock);
505 508
506 down_read(&sb->s_umount); 509 down_read(&sb->s_umount);
507 if (sb->s_root && sb->s_dirt) 510 if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN))
508 sb->s_op->write_super(sb); 511 sb->s_op->write_super(sb);
509 up_read(&sb->s_umount); 512 up_read(&sb->s_umount);
510 513
@@ -533,13 +536,13 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
533 536
534 spin_lock(&sb_lock); 537 spin_lock(&sb_lock);
535 list_for_each_entry(sb, &super_blocks, s_list) { 538 list_for_each_entry(sb, &super_blocks, s_list) {
536 if (list_empty(&sb->s_instances)) 539 if (hlist_unhashed(&sb->s_instances))
537 continue; 540 continue;
538 sb->s_count++; 541 sb->s_count++;
539 spin_unlock(&sb_lock); 542 spin_unlock(&sb_lock);
540 543
541 down_read(&sb->s_umount); 544 down_read(&sb->s_umount);
542 if (sb->s_root) 545 if (sb->s_root && (sb->s_flags & MS_BORN))
543 f(sb, arg); 546 f(sb, arg);
544 up_read(&sb->s_umount); 547 up_read(&sb->s_umount);
545 548
@@ -566,14 +569,15 @@ void iterate_supers_type(struct file_system_type *type,
566 void (*f)(struct super_block *, void *), void *arg) 569 void (*f)(struct super_block *, void *), void *arg)
567{ 570{
568 struct super_block *sb, *p = NULL; 571 struct super_block *sb, *p = NULL;
572 struct hlist_node *node;
569 573
570 spin_lock(&sb_lock); 574 spin_lock(&sb_lock);
571 list_for_each_entry(sb, &type->fs_supers, s_instances) { 575 hlist_for_each_entry(sb, node, &type->fs_supers, s_instances) {
572 sb->s_count++; 576 sb->s_count++;
573 spin_unlock(&sb_lock); 577 spin_unlock(&sb_lock);
574 578
575 down_read(&sb->s_umount); 579 down_read(&sb->s_umount);
576 if (sb->s_root) 580 if (sb->s_root && (sb->s_flags & MS_BORN))
577 f(sb, arg); 581 f(sb, arg);
578 up_read(&sb->s_umount); 582 up_read(&sb->s_umount);
579 583
@@ -607,14 +611,14 @@ struct super_block *get_super(struct block_device *bdev)
607 spin_lock(&sb_lock); 611 spin_lock(&sb_lock);
608rescan: 612rescan:
609 list_for_each_entry(sb, &super_blocks, s_list) { 613 list_for_each_entry(sb, &super_blocks, s_list) {
610 if (list_empty(&sb->s_instances)) 614 if (hlist_unhashed(&sb->s_instances))
611 continue; 615 continue;
612 if (sb->s_bdev == bdev) { 616 if (sb->s_bdev == bdev) {
613 sb->s_count++; 617 sb->s_count++;
614 spin_unlock(&sb_lock); 618 spin_unlock(&sb_lock);
615 down_read(&sb->s_umount); 619 down_read(&sb->s_umount);
616 /* still alive? */ 620 /* still alive? */
617 if (sb->s_root) 621 if (sb->s_root && (sb->s_flags & MS_BORN))
618 return sb; 622 return sb;
619 up_read(&sb->s_umount); 623 up_read(&sb->s_umount);
620 /* nope, got unmounted */ 624 /* nope, got unmounted */
@@ -647,7 +651,7 @@ struct super_block *get_active_super(struct block_device *bdev)
647restart: 651restart:
648 spin_lock(&sb_lock); 652 spin_lock(&sb_lock);
649 list_for_each_entry(sb, &super_blocks, s_list) { 653 list_for_each_entry(sb, &super_blocks, s_list) {
650 if (list_empty(&sb->s_instances)) 654 if (hlist_unhashed(&sb->s_instances))
651 continue; 655 continue;
652 if (sb->s_bdev == bdev) { 656 if (sb->s_bdev == bdev) {
653 if (grab_super(sb)) /* drops sb_lock */ 657 if (grab_super(sb)) /* drops sb_lock */
@@ -667,14 +671,14 @@ struct super_block *user_get_super(dev_t dev)
667 spin_lock(&sb_lock); 671 spin_lock(&sb_lock);
668rescan: 672rescan:
669 list_for_each_entry(sb, &super_blocks, s_list) { 673 list_for_each_entry(sb, &super_blocks, s_list) {
670 if (list_empty(&sb->s_instances)) 674 if (hlist_unhashed(&sb->s_instances))
671 continue; 675 continue;
672 if (sb->s_dev == dev) { 676 if (sb->s_dev == dev) {
673 sb->s_count++; 677 sb->s_count++;
674 spin_unlock(&sb_lock); 678 spin_unlock(&sb_lock);
675 down_read(&sb->s_umount); 679 down_read(&sb->s_umount);
676 /* still alive? */ 680 /* still alive? */
677 if (sb->s_root) 681 if (sb->s_root && (sb->s_flags & MS_BORN))
678 return sb; 682 return sb;
679 up_read(&sb->s_umount); 683 up_read(&sb->s_umount);
680 /* nope, got unmounted */ 684 /* nope, got unmounted */
@@ -719,23 +723,29 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
719 /* If we are remounting RDONLY and current sb is read/write, 723 /* If we are remounting RDONLY and current sb is read/write,
720 make sure there are no rw files opened */ 724 make sure there are no rw files opened */
721 if (remount_ro) { 725 if (remount_ro) {
722 if (force) 726 if (force) {
723 mark_files_ro(sb); 727 mark_files_ro(sb);
724 else if (!fs_may_remount_ro(sb)) 728 } else {
725 return -EBUSY; 729 retval = sb_prepare_remount_readonly(sb);
730 if (retval)
731 return retval;
732 }
726 } 733 }
727 734
728 if (sb->s_op->remount_fs) { 735 if (sb->s_op->remount_fs) {
729 retval = sb->s_op->remount_fs(sb, &flags, data); 736 retval = sb->s_op->remount_fs(sb, &flags, data);
730 if (retval) { 737 if (retval) {
731 if (!force) 738 if (!force)
732 return retval; 739 goto cancel_readonly;
733 /* If forced remount, go ahead despite any errors */ 740 /* If forced remount, go ahead despite any errors */
734 WARN(1, "forced remount of a %s fs returned %i\n", 741 WARN(1, "forced remount of a %s fs returned %i\n",
735 sb->s_type->name, retval); 742 sb->s_type->name, retval);
736 } 743 }
737 } 744 }
738 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); 745 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
746 /* Needs to be ordered wrt mnt_is_readonly() */
747 smp_wmb();
748 sb->s_readonly_remount = 0;
739 749
740 /* 750 /*
741 * Some filesystems modify their metadata via some other path than the 751 * Some filesystems modify their metadata via some other path than the
@@ -748,6 +758,10 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
748 if (remount_ro && sb->s_bdev) 758 if (remount_ro && sb->s_bdev)
749 invalidate_bdev(sb->s_bdev); 759 invalidate_bdev(sb->s_bdev);
750 return 0; 760 return 0;
761
762cancel_readonly:
763 sb->s_readonly_remount = 0;
764 return retval;
751} 765}
752 766
753static void do_emergency_remount(struct work_struct *work) 767static void do_emergency_remount(struct work_struct *work)
@@ -756,12 +770,13 @@ static void do_emergency_remount(struct work_struct *work)
756 770
757 spin_lock(&sb_lock); 771 spin_lock(&sb_lock);
758 list_for_each_entry(sb, &super_blocks, s_list) { 772 list_for_each_entry(sb, &super_blocks, s_list) {
759 if (list_empty(&sb->s_instances)) 773 if (hlist_unhashed(&sb->s_instances))
760 continue; 774 continue;
761 sb->s_count++; 775 sb->s_count++;
762 spin_unlock(&sb_lock); 776 spin_unlock(&sb_lock);
763 down_write(&sb->s_umount); 777 down_write(&sb->s_umount);
764 if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) { 778 if (sb->s_root && sb->s_bdev && (sb->s_flags & MS_BORN) &&
779 !(sb->s_flags & MS_RDONLY)) {
765 /* 780 /*
766 * What lock protects sb->s_flags?? 781 * What lock protects sb->s_flags??
767 */ 782 */
@@ -1144,6 +1159,11 @@ int freeze_super(struct super_block *sb)
1144 return -EBUSY; 1159 return -EBUSY;
1145 } 1160 }
1146 1161
1162 if (!(sb->s_flags & MS_BORN)) {
1163 up_write(&sb->s_umount);
1164 return 0; /* sic - it's "nothing to do" */
1165 }
1166
1147 if (sb->s_flags & MS_RDONLY) { 1167 if (sb->s_flags & MS_RDONLY) {
1148 sb->s_frozen = SB_FREEZE_TRANS; 1168 sb->s_frozen = SB_FREEZE_TRANS;
1149 smp_wmb(); 1169 smp_wmb();
diff --git a/fs/sync.c b/fs/sync.c
index 101b8ef901d7..f3501ef39235 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -14,7 +14,6 @@
14#include <linux/linkage.h> 14#include <linux/linkage.h>
15#include <linux/pagemap.h> 15#include <linux/pagemap.h>
16#include <linux/quotaops.h> 16#include <linux/quotaops.h>
17#include <linux/buffer_head.h>
18#include <linux/backing-dev.h> 17#include <linux/backing-dev.h>
19#include "internal.h" 18#include "internal.h"
20 19
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index d4e6080b4b20..62f4fb37789e 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -518,7 +518,7 @@ out:
518} 518}
519 519
520int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, 520int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
521 const struct attribute *attr, int type, mode_t amode) 521 const struct attribute *attr, int type, umode_t amode)
522{ 522{
523 umode_t mode = (amode & S_IALLUGO) | S_IFREG; 523 umode_t mode = (amode & S_IALLUGO) | S_IFREG;
524 struct sysfs_addrm_cxt acxt; 524 struct sysfs_addrm_cxt acxt;
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
618 * 618 *
619 */ 619 */
620int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, 620int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
621 mode_t mode) 621 umode_t mode)
622{ 622{
623 struct sysfs_dirent *sd; 623 struct sysfs_dirent *sd;
624 struct iattr newattrs; 624 struct iattr newattrs;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 194414f8298c..dd1701caecc9 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -33,7 +33,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
33 int error = 0, i; 33 int error = 0, i;
34 34
35 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { 35 for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
36 mode_t mode = 0; 36 umode_t mode = 0;
37 37
38 /* in update mode, we're changing the permissions or 38 /* in update mode, we're changing the permissions or
39 * visibility. Do this by first removing then 39 * visibility. Do this by first removing then
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index c81b22f3ace1..4a802b4a9056 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -187,7 +187,7 @@ out:
187 return error; 187 return error;
188} 188}
189 189
190static inline void set_default_inode_attr(struct inode * inode, mode_t mode) 190static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
191{ 191{
192 inode->i_mode = mode; 192 inode->i_mode = mode;
193 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 193 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ce29e28b766d..7484a36ee678 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -79,7 +79,7 @@ struct sysfs_dirent {
79 }; 79 };
80 80
81 unsigned int s_flags; 81 unsigned int s_flags;
82 unsigned short s_mode; 82 umode_t s_mode;
83 ino_t s_ino; 83 ino_t s_ino;
84 struct sysfs_inode_attrs *s_iattr; 84 struct sysfs_inode_attrs *s_iattr;
85}; 85};
@@ -229,7 +229,7 @@ int sysfs_add_file(struct sysfs_dirent *dir_sd,
229 const struct attribute *attr, int type); 229 const struct attribute *attr, int type);
230 230
231int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, 231int sysfs_add_file_mode(struct sysfs_dirent *dir_sd,
232 const struct attribute *attr, int type, mode_t amode); 232 const struct attribute *attr, int type, umode_t amode);
233/* 233/*
234 * bin.c 234 * bin.c
235 */ 235 */
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index 0c96c98bd1db..8233b02eccae 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -132,7 +132,7 @@ void sysv_free_inode(struct inode * inode)
132 brelse(bh); 132 brelse(bh);
133} 133}
134 134
135struct inode * sysv_new_inode(const struct inode * dir, mode_t mode) 135struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
136{ 136{
137 struct super_block *sb = dir->i_sb; 137 struct super_block *sb = dir->i_sb;
138 struct sysv_sb_info *sbi = SYSV_SB(sb); 138 struct sysv_sb_info *sbi = SYSV_SB(sb);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 25ffb3e9a3f8..3da5ce25faf0 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -336,7 +336,6 @@ static struct inode *sysv_alloc_inode(struct super_block *sb)
336static void sysv_i_callback(struct rcu_head *head) 336static void sysv_i_callback(struct rcu_head *head)
337{ 337{
338 struct inode *inode = container_of(head, struct inode, i_rcu); 338 struct inode *inode = container_of(head, struct inode, i_rcu);
339 INIT_LIST_HEAD(&inode->i_dentry);
340 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); 339 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
341} 340}
342 341
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index fa8d43c92bb8..90b54b438789 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -442,7 +442,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
442 442
443int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 443int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
444{ 444{
445 struct super_block *s = mnt->mnt_sb; 445 struct super_block *s = dentry->d_sb;
446 generic_fillattr(dentry->d_inode, stat); 446 generic_fillattr(dentry->d_inode, stat);
447 stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size); 447 stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
448 stat->blksize = s->s_blocksize; 448 stat->blksize = s->s_blocksize;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index e474fbcf8bde..b217797e621b 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -61,7 +61,7 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, st
61 return NULL; 61 return NULL;
62} 62}
63 63
64static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_t rdev) 64static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
65{ 65{
66 struct inode * inode; 66 struct inode * inode;
67 int err; 67 int err;
@@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, int mode, dev_
80 return err; 80 return err;
81} 81}
82 82
83static int sysv_create(struct inode * dir, struct dentry * dentry, int mode, struct nameidata *nd) 83static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd)
84{ 84{
85 return sysv_mknod(dir, dentry, mode, 0); 85 return sysv_mknod(dir, dentry, mode, 0);
86} 86}
@@ -131,7 +131,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
131 return add_nondir(dentry, inode); 131 return add_nondir(dentry, inode);
132} 132}
133 133
134static int sysv_mkdir(struct inode * dir, struct dentry *dentry, int mode) 134static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
135{ 135{
136 struct inode * inode; 136 struct inode * inode;
137 int err = -EMLINK; 137 int err = -EMLINK;
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index bb55cdb394bf..0e4b821c5691 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -125,7 +125,7 @@ static inline void dirty_sb(struct super_block *sb)
125/* ialloc.c */ 125/* ialloc.c */
126extern struct sysv_inode *sysv_raw_inode(struct super_block *, unsigned, 126extern struct sysv_inode *sysv_raw_inode(struct super_block *, unsigned,
127 struct buffer_head **); 127 struct buffer_head **);
128extern struct inode * sysv_new_inode(const struct inode *, mode_t); 128extern struct inode * sysv_new_inode(const struct inode *, umode_t);
129extern void sysv_free_inode(struct inode *); 129extern void sysv_free_inode(struct inode *);
130extern unsigned long sysv_count_free_inodes(struct super_block *); 130extern unsigned long sysv_count_free_inodes(struct super_block *);
131 131
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 683492043317..d6fe1c79f18b 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -56,7 +56,7 @@
56 * 56 *
57 * This function returns the inherited flags. 57 * This function returns the inherited flags.
58 */ 58 */
59static int inherit_flags(const struct inode *dir, int mode) 59static int inherit_flags(const struct inode *dir, umode_t mode)
60{ 60{
61 int flags; 61 int flags;
62 const struct ubifs_inode *ui = ubifs_inode(dir); 62 const struct ubifs_inode *ui = ubifs_inode(dir);
@@ -86,7 +86,7 @@ static int inherit_flags(const struct inode *dir, int mode)
86 * case of failure. 86 * case of failure.
87 */ 87 */
88struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, 88struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
89 int mode) 89 umode_t mode)
90{ 90{
91 struct inode *inode; 91 struct inode *inode;
92 struct ubifs_inode *ui; 92 struct ubifs_inode *ui;
@@ -253,7 +253,7 @@ out:
253 return ERR_PTR(err); 253 return ERR_PTR(err);
254} 254}
255 255
256static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode, 256static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
257 struct nameidata *nd) 257 struct nameidata *nd)
258{ 258{
259 struct inode *inode; 259 struct inode *inode;
@@ -268,7 +268,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
268 * parent directory inode. 268 * parent directory inode.
269 */ 269 */
270 270
271 dbg_gen("dent '%.*s', mode %#x in dir ino %lu", 271 dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
272 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); 272 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
273 273
274 err = ubifs_budget_space(c, &req); 274 err = ubifs_budget_space(c, &req);
@@ -712,7 +712,7 @@ out_cancel:
712 return err; 712 return err;
713} 713}
714 714
715static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 715static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
716{ 716{
717 struct inode *inode; 717 struct inode *inode;
718 struct ubifs_inode *dir_ui = ubifs_inode(dir); 718 struct ubifs_inode *dir_ui = ubifs_inode(dir);
@@ -725,7 +725,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
725 * directory inode. 725 * directory inode.
726 */ 726 */
727 727
728 dbg_gen("dent '%.*s', mode %#x in dir ino %lu", 728 dbg_gen("dent '%.*s', mode %#hx in dir ino %lu",
729 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino); 729 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
730 730
731 err = ubifs_budget_space(c, &req); 731 err = ubifs_budget_space(c, &req);
@@ -769,7 +769,7 @@ out_budg:
769} 769}
770 770
771static int ubifs_mknod(struct inode *dir, struct dentry *dentry, 771static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
772 int mode, dev_t rdev) 772 umode_t mode, dev_t rdev)
773{ 773{
774 struct inode *inode; 774 struct inode *inode;
775 struct ubifs_inode *ui; 775 struct ubifs_inode *ui;
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 548acf494afd..1a7e2d8bdbe9 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -173,12 +173,12 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
173 * Make sure the file-system is read-write and make sure it 173 * Make sure the file-system is read-write and make sure it
174 * will not become read-only while we are changing the flags. 174 * will not become read-only while we are changing the flags.
175 */ 175 */
176 err = mnt_want_write(file->f_path.mnt); 176 err = mnt_want_write_file(file);
177 if (err) 177 if (err)
178 return err; 178 return err;
179 dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags); 179 dbg_gen("set flags: %#x, i_flags %#x", flags, inode->i_flags);
180 err = setflags(inode, flags); 180 err = setflags(inode, flags);
181 mnt_drop_write(file->f_path.mnt); 181 mnt_drop_write_file(file);
182 return err; 182 return err;
183 } 183 }
184 184
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 20403dc5d437..63765d58445b 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -276,7 +276,6 @@ static void ubifs_i_callback(struct rcu_head *head)
276{ 276{
277 struct inode *inode = container_of(head, struct inode, i_rcu); 277 struct inode *inode = container_of(head, struct inode, i_rcu);
278 struct ubifs_inode *ui = ubifs_inode(inode); 278 struct ubifs_inode *ui = ubifs_inode(inode);
279 INIT_LIST_HEAD(&inode->i_dentry);
280 kmem_cache_free(ubifs_inode_slab, ui); 279 kmem_cache_free(ubifs_inode_slab, ui);
281} 280}
282 281
@@ -420,9 +419,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
420 return 0; 419 return 0;
421} 420}
422 421
423static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) 422static int ubifs_show_options(struct seq_file *s, struct dentry *root)
424{ 423{
425 struct ubifs_info *c = mnt->mnt_sb->s_fs_info; 424 struct ubifs_info *c = root->d_sb->s_fs_info;
426 425
427 if (c->mount_opts.unmount_mode == 2) 426 if (c->mount_opts.unmount_mode == 2)
428 seq_printf(s, ",fast_unmount"); 427 seq_printf(s, ",fast_unmount");
@@ -2264,19 +2263,12 @@ static int __init ubifs_init(void)
2264 return -EINVAL; 2263 return -EINVAL;
2265 } 2264 }
2266 2265
2267 err = register_filesystem(&ubifs_fs_type);
2268 if (err) {
2269 ubifs_err("cannot register file system, error %d", err);
2270 return err;
2271 }
2272
2273 err = -ENOMEM;
2274 ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab", 2266 ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
2275 sizeof(struct ubifs_inode), 0, 2267 sizeof(struct ubifs_inode), 0,
2276 SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT, 2268 SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
2277 &inode_slab_ctor); 2269 &inode_slab_ctor);
2278 if (!ubifs_inode_slab) 2270 if (!ubifs_inode_slab)
2279 goto out_reg; 2271 return -ENOMEM;
2280 2272
2281 register_shrinker(&ubifs_shrinker_info); 2273 register_shrinker(&ubifs_shrinker_info);
2282 2274
@@ -2288,15 +2280,20 @@ static int __init ubifs_init(void)
2288 if (err) 2280 if (err)
2289 goto out_compr; 2281 goto out_compr;
2290 2282
2283 err = register_filesystem(&ubifs_fs_type);
2284 if (err) {
2285 ubifs_err("cannot register file system, error %d", err);
2286 goto out_dbg;
2287 }
2291 return 0; 2288 return 0;
2292 2289
2290out_dbg:
2291 dbg_debugfs_exit();
2293out_compr: 2292out_compr:
2294 ubifs_compressors_exit(); 2293 ubifs_compressors_exit();
2295out_shrinker: 2294out_shrinker:
2296 unregister_shrinker(&ubifs_shrinker_info); 2295 unregister_shrinker(&ubifs_shrinker_info);
2297 kmem_cache_destroy(ubifs_inode_slab); 2296 kmem_cache_destroy(ubifs_inode_slab);
2298out_reg:
2299 unregister_filesystem(&ubifs_fs_type);
2300 return err; 2297 return err;
2301} 2298}
2302/* late_initcall to let compressors initialize first */ 2299/* late_initcall to let compressors initialize first */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 27f22551f805..12e94774aa88 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1734,7 +1734,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
1734 1734
1735/* dir.c */ 1735/* dir.c */
1736struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, 1736struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
1737 int mode); 1737 umode_t mode);
1738int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, 1738int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1739 struct kstat *stat); 1739 struct kstat *stat);
1740 1740
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 6fb7e0adcda0..05ab48195be9 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -46,7 +46,7 @@ void udf_free_inode(struct inode *inode)
46 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); 46 udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1);
47} 47}
48 48
49struct inode *udf_new_inode(struct inode *dir, int mode, int *err) 49struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err)
50{ 50{
51 struct super_block *sb = dir->i_sb; 51 struct super_block *sb = dir->i_sb;
52 struct udf_sb_info *sbi = UDF_SB(sb); 52 struct udf_sb_info *sbi = UDF_SB(sb);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 4fd1d809738c..4598904be1bb 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -48,7 +48,7 @@ MODULE_LICENSE("GPL");
48 48
49#define EXTENT_MERGE_SIZE 5 49#define EXTENT_MERGE_SIZE 5
50 50
51static mode_t udf_convert_permissions(struct fileEntry *); 51static umode_t udf_convert_permissions(struct fileEntry *);
52static int udf_update_inode(struct inode *, int); 52static int udf_update_inode(struct inode *, int);
53static void udf_fill_inode(struct inode *, struct buffer_head *); 53static void udf_fill_inode(struct inode *, struct buffer_head *);
54static int udf_sync_inode(struct inode *inode); 54static int udf_sync_inode(struct inode *inode);
@@ -1452,9 +1452,9 @@ static int udf_alloc_i_data(struct inode *inode, size_t size)
1452 return 0; 1452 return 0;
1453} 1453}
1454 1454
1455static mode_t udf_convert_permissions(struct fileEntry *fe) 1455static umode_t udf_convert_permissions(struct fileEntry *fe)
1456{ 1456{
1457 mode_t mode; 1457 umode_t mode;
1458 uint32_t permissions; 1458 uint32_t permissions;
1459 uint32_t flags; 1459 uint32_t flags;
1460 1460
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 4639e137222f..08bf46edf9c4 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -552,7 +552,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
552 return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); 552 return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL);
553} 553}
554 554
555static int udf_create(struct inode *dir, struct dentry *dentry, int mode, 555static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
556 struct nameidata *nd) 556 struct nameidata *nd)
557{ 557{
558 struct udf_fileident_bh fibh; 558 struct udf_fileident_bh fibh;
@@ -596,7 +596,7 @@ static int udf_create(struct inode *dir, struct dentry *dentry, int mode,
596 return 0; 596 return 0;
597} 597}
598 598
599static int udf_mknod(struct inode *dir, struct dentry *dentry, int mode, 599static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
600 dev_t rdev) 600 dev_t rdev)
601{ 601{
602 struct inode *inode; 602 struct inode *inode;
@@ -640,7 +640,7 @@ out:
640 return err; 640 return err;
641} 641}
642 642
643static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode) 643static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
644{ 644{
645 struct inode *inode; 645 struct inode *inode;
646 struct udf_fileident_bh fibh; 646 struct udf_fileident_bh fibh;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index e185253470df..0c33225647a0 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -89,7 +89,7 @@ static void udf_open_lvid(struct super_block *);
89static void udf_close_lvid(struct super_block *); 89static void udf_close_lvid(struct super_block *);
90static unsigned int udf_count_free(struct super_block *); 90static unsigned int udf_count_free(struct super_block *);
91static int udf_statfs(struct dentry *, struct kstatfs *); 91static int udf_statfs(struct dentry *, struct kstatfs *);
92static int udf_show_options(struct seq_file *, struct vfsmount *); 92static int udf_show_options(struct seq_file *, struct dentry *);
93 93
94struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) 94struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
95{ 95{
@@ -138,7 +138,6 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
138static void udf_i_callback(struct rcu_head *head) 138static void udf_i_callback(struct rcu_head *head)
139{ 139{
140 struct inode *inode = container_of(head, struct inode, i_rcu); 140 struct inode *inode = container_of(head, struct inode, i_rcu);
141 INIT_LIST_HEAD(&inode->i_dentry);
142 kmem_cache_free(udf_inode_cachep, UDF_I(inode)); 141 kmem_cache_free(udf_inode_cachep, UDF_I(inode));
143} 142}
144 143
@@ -196,11 +195,11 @@ struct udf_options {
196 unsigned int fileset; 195 unsigned int fileset;
197 unsigned int rootdir; 196 unsigned int rootdir;
198 unsigned int flags; 197 unsigned int flags;
199 mode_t umask; 198 umode_t umask;
200 gid_t gid; 199 gid_t gid;
201 uid_t uid; 200 uid_t uid;
202 mode_t fmode; 201 umode_t fmode;
203 mode_t dmode; 202 umode_t dmode;
204 struct nls_table *nls_map; 203 struct nls_table *nls_map;
205}; 204};
206 205
@@ -250,9 +249,9 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
250 return 0; 249 return 0;
251} 250}
252 251
253static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt) 252static int udf_show_options(struct seq_file *seq, struct dentry *root)
254{ 253{
255 struct super_block *sb = mnt->mnt_sb; 254 struct super_block *sb = root->d_sb;
256 struct udf_sb_info *sbi = UDF_SB(sb); 255 struct udf_sb_info *sbi = UDF_SB(sb);
257 256
258 if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT)) 257 if (!UDF_QUERY_FLAG(sb, UDF_FLAG_STRICT))
@@ -280,11 +279,11 @@ static int udf_show_options(struct seq_file *seq, struct vfsmount *mnt)
280 if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET)) 279 if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
281 seq_printf(seq, ",gid=%u", sbi->s_gid); 280 seq_printf(seq, ",gid=%u", sbi->s_gid);
282 if (sbi->s_umask != 0) 281 if (sbi->s_umask != 0)
283 seq_printf(seq, ",umask=%o", sbi->s_umask); 282 seq_printf(seq, ",umask=%ho", sbi->s_umask);
284 if (sbi->s_fmode != UDF_INVALID_MODE) 283 if (sbi->s_fmode != UDF_INVALID_MODE)
285 seq_printf(seq, ",mode=%o", sbi->s_fmode); 284 seq_printf(seq, ",mode=%ho", sbi->s_fmode);
286 if (sbi->s_dmode != UDF_INVALID_MODE) 285 if (sbi->s_dmode != UDF_INVALID_MODE)
287 seq_printf(seq, ",dmode=%o", sbi->s_dmode); 286 seq_printf(seq, ",dmode=%ho", sbi->s_dmode);
288 if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET)) 287 if (UDF_QUERY_FLAG(sb, UDF_FLAG_SESSION_SET))
289 seq_printf(seq, ",session=%u", sbi->s_session); 288 seq_printf(seq, ",session=%u", sbi->s_session);
290 if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET)) 289 if (UDF_QUERY_FLAG(sb, UDF_FLAG_LASTBLOCK_SET))
diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h
index 5142a82e3276..42ad69ac9576 100644
--- a/fs/udf/udf_sb.h
+++ b/fs/udf/udf_sb.h
@@ -50,7 +50,7 @@
50#define UDF_SPARABLE_MAP15 0x1522U 50#define UDF_SPARABLE_MAP15 0x1522U
51#define UDF_METADATA_MAP25 0x2511U 51#define UDF_METADATA_MAP25 0x2511U
52 52
53#define UDF_INVALID_MODE ((mode_t)-1) 53#define UDF_INVALID_MODE ((umode_t)-1)
54 54
55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */ 55#pragma pack(1) /* XXX(hch): Why? This file just defines in-core structures */
56 56
@@ -127,11 +127,11 @@ struct udf_sb_info {
127 struct buffer_head *s_lvid_bh; 127 struct buffer_head *s_lvid_bh;
128 128
129 /* Default permissions */ 129 /* Default permissions */
130 mode_t s_umask; 130 umode_t s_umask;
131 gid_t s_gid; 131 gid_t s_gid;
132 uid_t s_uid; 132 uid_t s_uid;
133 mode_t s_fmode; 133 umode_t s_fmode;
134 mode_t s_dmode; 134 umode_t s_dmode;
135 /* Lock protecting consistency of above permission settings */ 135 /* Lock protecting consistency of above permission settings */
136 rwlock_t s_cred_lock; 136 rwlock_t s_cred_lock;
137 137
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index f34e6fc0cdaa..ebe10314e512 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -215,7 +215,7 @@ extern int udf_CS0toUTF8(struct ustr *, const struct ustr *);
215 215
216/* ialloc.c */ 216/* ialloc.c */
217extern void udf_free_inode(struct inode *); 217extern void udf_free_inode(struct inode *);
218extern struct inode *udf_new_inode(struct inode *, int, int *); 218extern struct inode *udf_new_inode(struct inode *, umode_t, int *);
219 219
220/* truncate.c */ 220/* truncate.c */
221extern void udf_truncate_tail_extent(struct inode *); 221extern void udf_truncate_tail_extent(struct inode *);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 78a4c70d46b5..4ec5c1085a87 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -170,7 +170,7 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
170 * For other inodes, search forward from the parent directory's block 170 * For other inodes, search forward from the parent directory's block
171 * group to find a free inode. 171 * group to find a free inode.
172 */ 172 */
173struct inode * ufs_new_inode(struct inode * dir, int mode) 173struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
174{ 174{
175 struct super_block * sb; 175 struct super_block * sb;
176 struct ufs_sb_info * sbi; 176 struct ufs_sb_info * sbi;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 879b13436fa4..9094e1d917be 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -583,7 +583,7 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
583{ 583{
584 struct ufs_inode_info *ufsi = UFS_I(inode); 584 struct ufs_inode_info *ufsi = UFS_I(inode);
585 struct super_block *sb = inode->i_sb; 585 struct super_block *sb = inode->i_sb;
586 mode_t mode; 586 umode_t mode;
587 587
588 /* 588 /*
589 * Copy data to the in-core inode. 589 * Copy data to the in-core inode.
@@ -630,7 +630,7 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
630{ 630{
631 struct ufs_inode_info *ufsi = UFS_I(inode); 631 struct ufs_inode_info *ufsi = UFS_I(inode);
632 struct super_block *sb = inode->i_sb; 632 struct super_block *sb = inode->i_sb;
633 mode_t mode; 633 umode_t mode;
634 634
635 UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino); 635 UFSD("Reading ufs2 inode, ino %lu\n", inode->i_ino);
636 /* 636 /*
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 639d49162241..38cac199edff 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -70,7 +70,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
70 * If the create succeeds, we fill in the inode information 70 * If the create succeeds, we fill in the inode information
71 * with d_instantiate(). 71 * with d_instantiate().
72 */ 72 */
73static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, 73static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
74 struct nameidata *nd) 74 struct nameidata *nd)
75{ 75{
76 struct inode *inode; 76 struct inode *inode;
@@ -94,7 +94,7 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
94 return err; 94 return err;
95} 95}
96 96
97static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev) 97static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
98{ 98{
99 struct inode *inode; 99 struct inode *inode;
100 int err; 100 int err;
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
180 return error; 180 return error;
181} 181}
182 182
183static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode) 183static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
184{ 184{
185 struct inode * inode; 185 struct inode * inode;
186 int err = -EMLINK; 186 int err = -EMLINK;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 3915ade6f9a8..5246ee3e5607 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1351,9 +1351,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1351 return 0; 1351 return 0;
1352} 1352}
1353 1353
1354static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) 1354static int ufs_show_options(struct seq_file *seq, struct dentry *root)
1355{ 1355{
1356 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); 1356 struct ufs_sb_info *sbi = UFS_SB(root->d_sb);
1357 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; 1357 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
1358 const struct match_token *tp = tokens; 1358 const struct match_token *tp = tokens;
1359 1359
@@ -1425,7 +1425,6 @@ static struct inode *ufs_alloc_inode(struct super_block *sb)
1425static void ufs_i_callback(struct rcu_head *head) 1425static void ufs_i_callback(struct rcu_head *head)
1426{ 1426{
1427 struct inode *inode = container_of(head, struct inode, i_rcu); 1427 struct inode *inode = container_of(head, struct inode, i_rcu);
1428 INIT_LIST_HEAD(&inode->i_dentry);
1429 kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); 1428 kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
1430} 1429}
1431 1430
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index c26f2bcec264..528750b7e701 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -104,7 +104,7 @@ extern const struct address_space_operations ufs_aops;
104 104
105/* ialloc.c */ 105/* ialloc.c */
106extern void ufs_free_inode (struct inode *inode); 106extern void ufs_free_inode (struct inode *inode);
107extern struct inode * ufs_new_inode (struct inode *, int); 107extern struct inode * ufs_new_inode (struct inode *, umode_t);
108 108
109/* inode.c */ 109/* inode.c */
110extern struct inode *ufs_iget(struct super_block *, unsigned long); 110extern struct inode *ufs_iget(struct super_block *, unsigned long);
diff --git a/fs/xattr.c b/fs/xattr.c
index 67583de8218c..82f43376c7cd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -397,7 +397,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
397 error = mnt_want_write_file(f); 397 error = mnt_want_write_file(f);
398 if (!error) { 398 if (!error) {
399 error = setxattr(dentry, name, value, size, flags); 399 error = setxattr(dentry, name, value, size, flags);
400 mnt_drop_write(f->f_path.mnt); 400 mnt_drop_write_file(f);
401 } 401 }
402 fput(f); 402 fput(f);
403 return error; 403 return error;
@@ -624,7 +624,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
624 error = mnt_want_write_file(f); 624 error = mnt_want_write_file(f);
625 if (!error) { 625 if (!error) {
626 error = removexattr(dentry, name); 626 error = removexattr(dentry, name);
627 mnt_drop_write(f->f_path.mnt); 627 mnt_drop_write_file(f);
628 } 628 }
629 fput(f); 629 fput(f);
630 return error; 630 return error;
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 76e4266d2e7e..ac702a6eab9b 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -39,7 +39,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
39 struct posix_acl_entry *acl_e; 39 struct posix_acl_entry *acl_e;
40 struct posix_acl *acl; 40 struct posix_acl *acl;
41 struct xfs_acl_entry *ace; 41 struct xfs_acl_entry *ace;
42 int count, i; 42 unsigned int count, i;
43 43
44 count = be32_to_cpu(aclp->acl_cnt); 44 count = be32_to_cpu(aclp->acl_cnt);
45 if (count > XFS_ACL_MAX_ENTRIES) 45 if (count > XFS_ACL_MAX_ENTRIES)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cf0ac056815f..4dff85c7d7eb 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1370,7 +1370,7 @@ restart:
1370 goto restart; 1370 goto restart;
1371 } 1371 }
1372 /* 1372 /*
1373 * clear the LRU reference count so the bufer doesn't get 1373 * clear the LRU reference count so the buffer doesn't get
1374 * ignored in xfs_buf_rele(). 1374 * ignored in xfs_buf_rele().
1375 */ 1375 */
1376 atomic_set(&bp->b_lru_ref, 0); 1376 atomic_set(&bp->b_lru_ref, 0);
@@ -1701,12 +1701,8 @@ xfsbufd(
1701 struct list_head tmp; 1701 struct list_head tmp;
1702 struct blk_plug plug; 1702 struct blk_plug plug;
1703 1703
1704 if (unlikely(freezing(current))) { 1704 if (unlikely(freezing(current)))
1705 set_bit(XBT_FORCE_SLEEP, &target->bt_flags); 1705 try_to_freeze();
1706 refrigerator();
1707 } else {
1708 clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
1709 }
1710 1706
1711 /* sleep for a long time if there is nothing to do. */ 1707 /* sleep for a long time if there is nothing to do. */
1712 if (list_empty(&target->bt_delwri_queue)) 1708 if (list_empty(&target->bt_delwri_queue))
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5bab046e859f..df7ffb0affe7 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -90,8 +90,7 @@ typedef unsigned int xfs_buf_flags_t;
90 { _XBF_DELWRI_Q, "DELWRI_Q" } 90 { _XBF_DELWRI_Q, "DELWRI_Q" }
91 91
92typedef enum { 92typedef enum {
93 XBT_FORCE_SLEEP = 0, 93 XBT_FORCE_FLUSH = 0,
94 XBT_FORCE_FLUSH = 1,
95} xfs_buftarg_flags_t; 94} xfs_buftarg_flags_t;
96 95
97typedef struct xfs_buftarg { 96typedef struct xfs_buftarg {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 25d7280e9f6b..b4ff40b5f918 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -39,20 +39,19 @@
39#include "xfs_qm.h" 39#include "xfs_qm.h"
40#include "xfs_trace.h" 40#include "xfs_trace.h"
41 41
42
43/* 42/*
44 LOCK ORDER 43 * Lock order:
45 44 *
46 inode lock (ilock) 45 * ip->i_lock
47 dquot hash-chain lock (hashlock) 46 * qh->qh_lock
48 xqm dquot freelist lock (freelistlock 47 * qi->qi_dqlist_lock
49 mount's dquot list lock (mplistlock) 48 * dquot->q_qlock (xfs_dqlock() and friends)
50 user dquot lock - lock ordering among dquots is based on the uid or gid 49 * dquot->q_flush (xfs_dqflock() and friends)
51 group dquot lock - similar to udquots. Between the two dquots, the udquot 50 * xfs_Gqm->qm_dqfrlist_lock
52 has to be locked first. 51 *
53 pin lock - the dquot lock must be held to take this lock. 52 * If two dquots need to be locked the order is user before group/project,
54 flush lock - ditto. 53 * otherwise by the lowest id first, see xfs_dqlock2.
55*/ 54 */
56 55
57#ifdef DEBUG 56#ifdef DEBUG
58xfs_buftarg_t *xfs_dqerror_target; 57xfs_buftarg_t *xfs_dqerror_target;
@@ -155,24 +154,6 @@ xfs_qm_dqdestroy(
155} 154}
156 155
157/* 156/*
158 * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
159 */
160STATIC void
161xfs_qm_dqinit_core(
162 xfs_dqid_t id,
163 uint type,
164 xfs_dqblk_t *d)
165{
166 /*
167 * Caller has zero'd the entire dquot 'chunk' already.
168 */
169 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
170 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
171 d->dd_diskdq.d_id = cpu_to_be32(id);
172 d->dd_diskdq.d_flags = type;
173}
174
175/*
176 * If default limits are in force, push them into the dquot now. 157 * If default limits are in force, push them into the dquot now.
177 * We overwrite the dquot limits only if they are zero and this 158 * We overwrite the dquot limits only if they are zero and this
178 * is not the root dquot. 159 * is not the root dquot.
@@ -328,8 +309,13 @@ xfs_qm_init_dquot_blk(
328 curid = id - (id % q->qi_dqperchunk); 309 curid = id - (id % q->qi_dqperchunk);
329 ASSERT(curid >= 0); 310 ASSERT(curid >= 0);
330 memset(d, 0, BBTOB(q->qi_dqchunklen)); 311 memset(d, 0, BBTOB(q->qi_dqchunklen));
331 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) 312 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
332 xfs_qm_dqinit_core(curid, type, d); 313 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
314 d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
315 d->dd_diskdq.d_id = cpu_to_be32(curid);
316 d->dd_diskdq.d_flags = type;
317 }
318
333 xfs_trans_dquot_buf(tp, bp, 319 xfs_trans_dquot_buf(tp, bp,
334 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : 320 (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
335 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : 321 ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
@@ -564,36 +550,62 @@ xfs_qm_dqtobp(
564 * Read in the ondisk dquot using dqtobp() then copy it to an incore version, 550 * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
565 * and release the buffer immediately. 551 * and release the buffer immediately.
566 * 552 *
553 * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed.
567 */ 554 */
568/* ARGSUSED */ 555int
569STATIC int
570xfs_qm_dqread( 556xfs_qm_dqread(
571 xfs_trans_t **tpp, 557 struct xfs_mount *mp,
572 xfs_dqid_t id, 558 xfs_dqid_t id,
573 xfs_dquot_t *dqp, /* dquot to get filled in */ 559 uint type,
574 uint flags) 560 uint flags,
561 struct xfs_dquot **O_dqpp)
575{ 562{
576 xfs_disk_dquot_t *ddqp; 563 struct xfs_dquot *dqp;
577 xfs_buf_t *bp; 564 struct xfs_disk_dquot *ddqp;
578 int error; 565 struct xfs_buf *bp;
579 xfs_trans_t *tp; 566 struct xfs_trans *tp = NULL;
567 int error;
568 int cancelflags = 0;
580 569
581 ASSERT(tpp); 570 dqp = xfs_qm_dqinit(mp, id, type);
582 571
583 trace_xfs_dqread(dqp); 572 trace_xfs_dqread(dqp);
584 573
574 if (flags & XFS_QMOPT_DQALLOC) {
575 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
576 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
577 XFS_WRITE_LOG_RES(mp) +
578 /*
579 * Round the chunklen up to the next multiple
580 * of 128 (buf log item chunk size)).
581 */
582 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + 128,
583 0,
584 XFS_TRANS_PERM_LOG_RES,
585 XFS_WRITE_LOG_COUNT);
586 if (error)
587 goto error1;
588 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
589 }
590
585 /* 591 /*
586 * get a pointer to the on-disk dquot and the buffer containing it 592 * get a pointer to the on-disk dquot and the buffer containing it
587 * dqp already knows its own type (GROUP/USER). 593 * dqp already knows its own type (GROUP/USER).
588 */ 594 */
589 if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { 595 error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags);
590 return (error); 596 if (error) {
597 /*
598 * This can happen if quotas got turned off (ESRCH),
599 * or if the dquot didn't exist on disk and we ask to
600 * allocate (ENOENT).
601 */
602 trace_xfs_dqread_fail(dqp);
603 cancelflags |= XFS_TRANS_ABORT;
604 goto error1;
591 } 605 }
592 tp = *tpp;
593 606
594 /* copy everything from disk dquot to the incore dquot */ 607 /* copy everything from disk dquot to the incore dquot */
595 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); 608 memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
596 ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
597 xfs_qm_dquot_logitem_init(dqp); 609 xfs_qm_dquot_logitem_init(dqp);
598 610
599 /* 611 /*
@@ -622,77 +634,22 @@ xfs_qm_dqread(
622 ASSERT(xfs_buf_islocked(bp)); 634 ASSERT(xfs_buf_islocked(bp));
623 xfs_trans_brelse(tp, bp); 635 xfs_trans_brelse(tp, bp);
624 636
625 return (error);
626}
627
628
629/*
630 * allocate an incore dquot from the kernel heap,
631 * and fill its core with quota information kept on disk.
632 * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
633 * if it wasn't already allocated.
634 */
635STATIC int
636xfs_qm_idtodq(
637 xfs_mount_t *mp,
638 xfs_dqid_t id, /* gid or uid, depending on type */
639 uint type, /* UDQUOT or GDQUOT */
640 uint flags, /* DQALLOC, DQREPAIR */
641 xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */
642{
643 xfs_dquot_t *dqp;
644 int error;
645 xfs_trans_t *tp;
646 int cancelflags=0;
647
648 dqp = xfs_qm_dqinit(mp, id, type);
649 tp = NULL;
650 if (flags & XFS_QMOPT_DQALLOC) {
651 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
652 error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
653 XFS_WRITE_LOG_RES(mp) +
654 BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
655 128,
656 0,
657 XFS_TRANS_PERM_LOG_RES,
658 XFS_WRITE_LOG_COUNT);
659 if (error) {
660 cancelflags = 0;
661 goto error0;
662 }
663 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
664 }
665
666 /*
667 * Read it from disk; xfs_dqread() takes care of
668 * all the necessary initialization of dquot's fields (locks, etc)
669 */
670 if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
671 /*
672 * This can happen if quotas got turned off (ESRCH),
673 * or if the dquot didn't exist on disk and we ask to
674 * allocate (ENOENT).
675 */
676 trace_xfs_dqread_fail(dqp);
677 cancelflags |= XFS_TRANS_ABORT;
678 goto error0;
679 }
680 if (tp) { 637 if (tp) {
681 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) 638 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
682 goto error1; 639 if (error)
640 goto error0;
683 } 641 }
684 642
685 *O_dqpp = dqp; 643 *O_dqpp = dqp;
686 return (0); 644 return error;
687 645
688 error0: 646error1:
689 ASSERT(error);
690 if (tp) 647 if (tp)
691 xfs_trans_cancel(tp, cancelflags); 648 xfs_trans_cancel(tp, cancelflags);
692 error1: 649error0:
693 xfs_qm_dqdestroy(dqp); 650 xfs_qm_dqdestroy(dqp);
694 *O_dqpp = NULL; 651 *O_dqpp = NULL;
695 return (error); 652 return error;
696} 653}
697 654
698/* 655/*
@@ -710,12 +667,9 @@ xfs_qm_dqlookup(
710 xfs_dquot_t **O_dqpp) 667 xfs_dquot_t **O_dqpp)
711{ 668{
712 xfs_dquot_t *dqp; 669 xfs_dquot_t *dqp;
713 uint flist_locked;
714 670
715 ASSERT(mutex_is_locked(&qh->qh_lock)); 671 ASSERT(mutex_is_locked(&qh->qh_lock));
716 672
717 flist_locked = B_FALSE;
718
719 /* 673 /*
720 * Traverse the hashchain looking for a match 674 * Traverse the hashchain looking for a match
721 */ 675 */
@@ -725,70 +679,31 @@ xfs_qm_dqlookup(
725 * dqlock to look at the id field of the dquot, since the 679 * dqlock to look at the id field of the dquot, since the
726 * id can't be modified without the hashlock anyway. 680 * id can't be modified without the hashlock anyway.
727 */ 681 */
728 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { 682 if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
729 trace_xfs_dqlookup_found(dqp); 683 continue;
730 684
731 /* 685 trace_xfs_dqlookup_found(dqp);
732 * All in core dquots must be on the dqlist of mp
733 */
734 ASSERT(!list_empty(&dqp->q_mplist));
735
736 xfs_dqlock(dqp);
737 if (dqp->q_nrefs == 0) {
738 ASSERT(!list_empty(&dqp->q_freelist));
739 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
740 trace_xfs_dqlookup_want(dqp);
741
742 /*
743 * We may have raced with dqreclaim_one()
744 * (and lost). So, flag that we don't
745 * want the dquot to be reclaimed.
746 */
747 dqp->dq_flags |= XFS_DQ_WANT;
748 xfs_dqunlock(dqp);
749 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
750 xfs_dqlock(dqp);
751 dqp->dq_flags &= ~(XFS_DQ_WANT);
752 }
753 flist_locked = B_TRUE;
754 }
755 686
756 /* 687 xfs_dqlock(dqp);
757 * id couldn't have changed; we had the hashlock all 688 if (dqp->dq_flags & XFS_DQ_FREEING) {
758 * along 689 *O_dqpp = NULL;
759 */ 690 xfs_dqunlock(dqp);
760 ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); 691 return -1;
761 692 }
762 if (flist_locked) {
763 if (dqp->q_nrefs != 0) {
764 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
765 flist_locked = B_FALSE;
766 } else {
767 /* take it off the freelist */
768 trace_xfs_dqlookup_freelist(dqp);
769 list_del_init(&dqp->q_freelist);
770 xfs_Gqm->qm_dqfrlist_cnt--;
771 }
772 }
773 693
774 XFS_DQHOLD(dqp); 694 dqp->q_nrefs++;
775 695
776 if (flist_locked) 696 /*
777 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 697 * move the dquot to the front of the hashchain
778 /* 698 */
779 * move the dquot to the front of the hashchain 699 list_move(&dqp->q_hashlist, &qh->qh_list);
780 */ 700 trace_xfs_dqlookup_done(dqp);
781 ASSERT(mutex_is_locked(&qh->qh_lock)); 701 *O_dqpp = dqp;
782 list_move(&dqp->q_hashlist, &qh->qh_list); 702 return 0;
783 trace_xfs_dqlookup_done(dqp);
784 *O_dqpp = dqp;
785 return 0;
786 }
787 } 703 }
788 704
789 *O_dqpp = NULL; 705 *O_dqpp = NULL;
790 ASSERT(mutex_is_locked(&qh->qh_lock)); 706 return 1;
791 return (1);
792} 707}
793 708
794/* 709/*
@@ -829,11 +744,7 @@ xfs_qm_dqget(
829 return (EIO); 744 return (EIO);
830 } 745 }
831 } 746 }
832#endif
833
834 again:
835 747
836#ifdef DEBUG
837 ASSERT(type == XFS_DQ_USER || 748 ASSERT(type == XFS_DQ_USER ||
838 type == XFS_DQ_PROJ || 749 type == XFS_DQ_PROJ ||
839 type == XFS_DQ_GROUP); 750 type == XFS_DQ_GROUP);
@@ -845,13 +756,21 @@ xfs_qm_dqget(
845 ASSERT(ip->i_gdquot == NULL); 756 ASSERT(ip->i_gdquot == NULL);
846 } 757 }
847#endif 758#endif
759
760restart:
848 mutex_lock(&h->qh_lock); 761 mutex_lock(&h->qh_lock);
849 762
850 /* 763 /*
851 * Look in the cache (hashtable). 764 * Look in the cache (hashtable).
852 * The chain is kept locked during lookup. 765 * The chain is kept locked during lookup.
853 */ 766 */
854 if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { 767 switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
768 case -1:
769 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
770 mutex_unlock(&h->qh_lock);
771 delay(1);
772 goto restart;
773 case 0:
855 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); 774 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
856 /* 775 /*
857 * The dquot was found, moved to the front of the chain, 776 * The dquot was found, moved to the front of the chain,
@@ -862,9 +781,11 @@ xfs_qm_dqget(
862 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); 781 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
863 mutex_unlock(&h->qh_lock); 782 mutex_unlock(&h->qh_lock);
864 trace_xfs_dqget_hit(*O_dqpp); 783 trace_xfs_dqget_hit(*O_dqpp);
865 return (0); /* success */ 784 return 0; /* success */
785 default:
786 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
787 break;
866 } 788 }
867 XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
868 789
869 /* 790 /*
870 * Dquot cache miss. We don't want to keep the inode lock across 791 * Dquot cache miss. We don't want to keep the inode lock across
@@ -882,41 +803,18 @@ xfs_qm_dqget(
882 version = h->qh_version; 803 version = h->qh_version;
883 mutex_unlock(&h->qh_lock); 804 mutex_unlock(&h->qh_lock);
884 805
885 /* 806 error = xfs_qm_dqread(mp, id, type, flags, &dqp);
886 * Allocate the dquot on the kernel heap, and read the ondisk
887 * portion off the disk. Also, do all the necessary initialization
888 * This can return ENOENT if dquot didn't exist on disk and we didn't
889 * ask it to allocate; ESRCH if quotas got turned off suddenly.
890 */
891 if ((error = xfs_qm_idtodq(mp, id, type,
892 flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
893 XFS_QMOPT_DOWARN),
894 &dqp))) {
895 if (ip)
896 xfs_ilock(ip, XFS_ILOCK_EXCL);
897 return (error);
898 }
899 807
900 /* 808 if (ip)
901 * See if this is mount code calling to look at the overall quota limits 809 xfs_ilock(ip, XFS_ILOCK_EXCL);
902 * which are stored in the id == 0 user or group's dquot. 810
903 * Since we may not have done a quotacheck by this point, just return 811 if (error)
904 * the dquot without attaching it to any hashtables, lists, etc, or even 812 return error;
905 * taking a reference.
906 * The caller must dqdestroy this once done.
907 */
908 if (flags & XFS_QMOPT_DQSUSER) {
909 ASSERT(id == 0);
910 ASSERT(! ip);
911 goto dqret;
912 }
913 813
914 /* 814 /*
915 * Dquot lock comes after hashlock in the lock ordering 815 * Dquot lock comes after hashlock in the lock ordering
916 */ 816 */
917 if (ip) { 817 if (ip) {
918 xfs_ilock(ip, XFS_ILOCK_EXCL);
919
920 /* 818 /*
921 * A dquot could be attached to this inode by now, since 819 * A dquot could be attached to this inode by now, since
922 * we had dropped the ilock. 820 * we had dropped the ilock.
@@ -961,16 +859,21 @@ xfs_qm_dqget(
961 * lock order between the two dquots here since dqp isn't 859 * lock order between the two dquots here since dqp isn't
962 * on any findable lists yet. 860 * on any findable lists yet.
963 */ 861 */
964 if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { 862 switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
863 case 0:
864 case -1:
965 /* 865 /*
966 * Duplicate found. Just throw away the new dquot 866 * Duplicate found, either in cache or on its way out.
967 * and start over. 867 * Just throw away the new dquot and start over.
968 */ 868 */
969 xfs_qm_dqput(tmpdqp); 869 if (tmpdqp)
870 xfs_qm_dqput(tmpdqp);
970 mutex_unlock(&h->qh_lock); 871 mutex_unlock(&h->qh_lock);
971 xfs_qm_dqdestroy(dqp); 872 xfs_qm_dqdestroy(dqp);
972 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); 873 XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
973 goto again; 874 goto restart;
875 default:
876 break;
974 } 877 }
975 } 878 }
976 879
@@ -1015,67 +918,49 @@ xfs_qm_dqget(
1015 */ 918 */
1016void 919void
1017xfs_qm_dqput( 920xfs_qm_dqput(
1018 xfs_dquot_t *dqp) 921 struct xfs_dquot *dqp)
1019{ 922{
1020 xfs_dquot_t *gdqp; 923 struct xfs_dquot *gdqp;
1021 924
1022 ASSERT(dqp->q_nrefs > 0); 925 ASSERT(dqp->q_nrefs > 0);
1023 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 926 ASSERT(XFS_DQ_IS_LOCKED(dqp));
1024 927
1025 trace_xfs_dqput(dqp); 928 trace_xfs_dqput(dqp);
1026 929
1027 if (dqp->q_nrefs != 1) { 930recurse:
1028 dqp->q_nrefs--; 931 if (--dqp->q_nrefs > 0) {
1029 xfs_dqunlock(dqp); 932 xfs_dqunlock(dqp);
1030 return; 933 return;
1031 } 934 }
1032 935
936 trace_xfs_dqput_free(dqp);
937
938 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
939 if (list_empty(&dqp->q_freelist)) {
940 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
941 xfs_Gqm->qm_dqfrlist_cnt++;
942 }
943 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
944
1033 /* 945 /*
1034 * drop the dqlock and acquire the freelist and dqlock 946 * If we just added a udquot to the freelist, then we want to release
1035 * in the right order; but try to get it out-of-order first 947 * the gdquot reference that it (probably) has. Otherwise it'll keep
948 * the gdquot from getting reclaimed.
1036 */ 949 */
1037 if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { 950 gdqp = dqp->q_gdquot;
1038 trace_xfs_dqput_wait(dqp); 951 if (gdqp) {
1039 xfs_dqunlock(dqp); 952 xfs_dqlock(gdqp);
1040 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 953 dqp->q_gdquot = NULL;
1041 xfs_dqlock(dqp);
1042 } 954 }
955 xfs_dqunlock(dqp);
1043 956
1044 while (1) { 957 /*
1045 gdqp = NULL; 958 * If we had a group quota hint, release it now.
1046 959 */
1047 /* We can't depend on nrefs being == 1 here */ 960 if (gdqp) {
1048 if (--dqp->q_nrefs == 0) {
1049 trace_xfs_dqput_free(dqp);
1050
1051 list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
1052 xfs_Gqm->qm_dqfrlist_cnt++;
1053
1054 /*
1055 * If we just added a udquot to the freelist, then
1056 * we want to release the gdquot reference that
1057 * it (probably) has. Otherwise it'll keep the
1058 * gdquot from getting reclaimed.
1059 */
1060 if ((gdqp = dqp->q_gdquot)) {
1061 /*
1062 * Avoid a recursive dqput call
1063 */
1064 xfs_dqlock(gdqp);
1065 dqp->q_gdquot = NULL;
1066 }
1067 }
1068 xfs_dqunlock(dqp);
1069
1070 /*
1071 * If we had a group quota inside the user quota as a hint,
1072 * release it now.
1073 */
1074 if (! gdqp)
1075 break;
1076 dqp = gdqp; 961 dqp = gdqp;
962 goto recurse;
1077 } 963 }
1078 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1079} 964}
1080 965
1081/* 966/*
@@ -1169,7 +1054,7 @@ xfs_qm_dqflush(
1169 * If not dirty, or it's pinned and we are not supposed to block, nada. 1054 * If not dirty, or it's pinned and we are not supposed to block, nada.
1170 */ 1055 */
1171 if (!XFS_DQ_IS_DIRTY(dqp) || 1056 if (!XFS_DQ_IS_DIRTY(dqp) ||
1172 (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { 1057 ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) {
1173 xfs_dqfunlock(dqp); 1058 xfs_dqfunlock(dqp);
1174 return 0; 1059 return 0;
1175 } 1060 }
@@ -1257,40 +1142,17 @@ xfs_qm_dqflush(
1257 1142
1258} 1143}
1259 1144
1260int
1261xfs_qm_dqlock_nowait(
1262 xfs_dquot_t *dqp)
1263{
1264 return mutex_trylock(&dqp->q_qlock);
1265}
1266
1267void
1268xfs_dqlock(
1269 xfs_dquot_t *dqp)
1270{
1271 mutex_lock(&dqp->q_qlock);
1272}
1273
1274void 1145void
1275xfs_dqunlock( 1146xfs_dqunlock(
1276 xfs_dquot_t *dqp) 1147 xfs_dquot_t *dqp)
1277{ 1148{
1278 mutex_unlock(&(dqp->q_qlock)); 1149 xfs_dqunlock_nonotify(dqp);
1279 if (dqp->q_logitem.qli_dquot == dqp) { 1150 if (dqp->q_logitem.qli_dquot == dqp) {
1280 /* Once was dqp->q_mount, but might just have been cleared */
1281 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, 1151 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
1282 (xfs_log_item_t*)&(dqp->q_logitem)); 1152 &dqp->q_logitem.qli_item);
1283 } 1153 }
1284} 1154}
1285 1155
1286
1287void
1288xfs_dqunlock_nonotify(
1289 xfs_dquot_t *dqp)
1290{
1291 mutex_unlock(&(dqp->q_qlock));
1292}
1293
1294/* 1156/*
1295 * Lock two xfs_dquot structures. 1157 * Lock two xfs_dquot structures.
1296 * 1158 *
@@ -1319,43 +1181,18 @@ xfs_dqlock2(
1319 } 1181 }
1320} 1182}
1321 1183
1322
1323/* 1184/*
1324 * Take a dquot out of the mount's dqlist as well as the hashlist. 1185 * Take a dquot out of the mount's dqlist as well as the hashlist. This is
1325 * This is called via unmount as well as quotaoff, and the purge 1186 * called via unmount as well as quotaoff, and the purge will always succeed.
1326 * will always succeed unless there are soft (temp) references
1327 * outstanding.
1328 *
1329 * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
1330 * that we're returning! XXXsup - not cool.
1331 */ 1187 */
1332/* ARGSUSED */ 1188void
1333int
1334xfs_qm_dqpurge( 1189xfs_qm_dqpurge(
1335 xfs_dquot_t *dqp) 1190 struct xfs_dquot *dqp)
1336{ 1191{
1337 xfs_dqhash_t *qh = dqp->q_hash; 1192 struct xfs_mount *mp = dqp->q_mount;
1338 xfs_mount_t *mp = dqp->q_mount; 1193 struct xfs_dqhash *qh = dqp->q_hash;
1339
1340 ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
1341 ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
1342 1194
1343 xfs_dqlock(dqp); 1195 xfs_dqlock(dqp);
1344 /*
1345 * We really can't afford to purge a dquot that is
1346 * referenced, because these are hard refs.
1347 * It shouldn't happen in general because we went thru _all_ inodes in
1348 * dqrele_all_inodes before calling this and didn't let the mountlock go.
1349 * However it is possible that we have dquots with temporary
1350 * references that are not attached to an inode. e.g. see xfs_setattr().
1351 */
1352 if (dqp->q_nrefs != 0) {
1353 xfs_dqunlock(dqp);
1354 mutex_unlock(&dqp->q_hash->qh_lock);
1355 return (1);
1356 }
1357
1358 ASSERT(!list_empty(&dqp->q_freelist));
1359 1196
1360 /* 1197 /*
1361 * If we're turning off quotas, we have to make sure that, for 1198 * If we're turning off quotas, we have to make sure that, for
@@ -1370,23 +1207,18 @@ xfs_qm_dqpurge(
1370 * Block on the flush lock after nudging dquot buffer, 1207 * Block on the flush lock after nudging dquot buffer,
1371 * if it is incore. 1208 * if it is incore.
1372 */ 1209 */
1373 xfs_qm_dqflock_pushbuf_wait(dqp); 1210 xfs_dqflock_pushbuf_wait(dqp);
1374 } 1211 }
1375 1212
1376 /* 1213 /*
1377 * XXXIf we're turning this type of quotas off, we don't care 1214 * If we are turning this type of quotas off, we don't care
1378 * about the dirty metadata sitting in this dquot. OTOH, if 1215 * about the dirty metadata sitting in this dquot. OTOH, if
1379 * we're unmounting, we do care, so we flush it and wait. 1216 * we're unmounting, we do care, so we flush it and wait.
1380 */ 1217 */
1381 if (XFS_DQ_IS_DIRTY(dqp)) { 1218 if (XFS_DQ_IS_DIRTY(dqp)) {
1382 int error; 1219 int error;
1383 1220
1384 /* dqflush unlocks dqflock */
1385 /* 1221 /*
1386 * Given that dqpurge is a very rare occurrence, it is OK
1387 * that we're holding the hashlist and mplist locks
1388 * across the disk write. But, ... XXXsup
1389 *
1390 * We don't care about getting disk errors here. We need 1222 * We don't care about getting disk errors here. We need
1391 * to purge this dquot anyway, so we go ahead regardless. 1223 * to purge this dquot anyway, so we go ahead regardless.
1392 */ 1224 */
@@ -1396,38 +1228,44 @@ xfs_qm_dqpurge(
1396 __func__, dqp); 1228 __func__, dqp);
1397 xfs_dqflock(dqp); 1229 xfs_dqflock(dqp);
1398 } 1230 }
1231
1399 ASSERT(atomic_read(&dqp->q_pincount) == 0); 1232 ASSERT(atomic_read(&dqp->q_pincount) == 0);
1400 ASSERT(XFS_FORCED_SHUTDOWN(mp) || 1233 ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1401 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); 1234 !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1402 1235
1236 xfs_dqfunlock(dqp);
1237 xfs_dqunlock(dqp);
1238
1239 mutex_lock(&qh->qh_lock);
1403 list_del_init(&dqp->q_hashlist); 1240 list_del_init(&dqp->q_hashlist);
1404 qh->qh_version++; 1241 qh->qh_version++;
1242 mutex_unlock(&qh->qh_lock);
1243
1244 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1405 list_del_init(&dqp->q_mplist); 1245 list_del_init(&dqp->q_mplist);
1406 mp->m_quotainfo->qi_dqreclaims++; 1246 mp->m_quotainfo->qi_dqreclaims++;
1407 mp->m_quotainfo->qi_dquots--; 1247 mp->m_quotainfo->qi_dquots--;
1248 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1249
1408 /* 1250 /*
1409 * XXX Move this to the front of the freelist, if we can get the 1251 * We move dquots to the freelist as soon as their reference count
1410 * freelist lock. 1252 * hits zero, so it really should be on the freelist here.
1411 */ 1253 */
1254 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1412 ASSERT(!list_empty(&dqp->q_freelist)); 1255 ASSERT(!list_empty(&dqp->q_freelist));
1256 list_del_init(&dqp->q_freelist);
1257 xfs_Gqm->qm_dqfrlist_cnt--;
1258 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1413 1259
1414 dqp->q_mount = NULL; 1260 xfs_qm_dqdestroy(dqp);
1415 dqp->q_hash = NULL;
1416 dqp->dq_flags = XFS_DQ_INACTIVE;
1417 memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1418 xfs_dqfunlock(dqp);
1419 xfs_dqunlock(dqp);
1420 mutex_unlock(&qh->qh_lock);
1421 return (0);
1422} 1261}
1423 1262
1424
1425/* 1263/*
1426 * Give the buffer a little push if it is incore and 1264 * Give the buffer a little push if it is incore and
1427 * wait on the flush lock. 1265 * wait on the flush lock.
1428 */ 1266 */
1429void 1267void
1430xfs_qm_dqflock_pushbuf_wait( 1268xfs_dqflock_pushbuf_wait(
1431 xfs_dquot_t *dqp) 1269 xfs_dquot_t *dqp)
1432{ 1270{
1433 xfs_mount_t *mp = dqp->q_mount; 1271 xfs_mount_t *mp = dqp->q_mount;
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 34b7e945dbfa..a1d91d8f1802 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -80,8 +80,6 @@ enum {
80 XFS_QLOCK_NESTED, 80 XFS_QLOCK_NESTED,
81}; 81};
82 82
83#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
84
85/* 83/*
86 * Manage the q_flush completion queue embedded in the dquot. This completion 84 * Manage the q_flush completion queue embedded in the dquot. This completion
87 * queue synchronizes processes attempting to flush the in-core dquot back to 85 * queue synchronizes processes attempting to flush the in-core dquot back to
@@ -102,6 +100,21 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
102 complete(&dqp->q_flush); 100 complete(&dqp->q_flush);
103} 101}
104 102
103static inline int xfs_dqlock_nowait(struct xfs_dquot *dqp)
104{
105 return mutex_trylock(&dqp->q_qlock);
106}
107
108static inline void xfs_dqlock(struct xfs_dquot *dqp)
109{
110 mutex_lock(&dqp->q_qlock);
111}
112
113static inline void xfs_dqunlock_nonotify(struct xfs_dquot *dqp)
114{
115 mutex_unlock(&dqp->q_qlock);
116}
117
105#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) 118#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
106#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) 119#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
107#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) 120#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
@@ -116,12 +129,12 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
116 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ 129 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
117 (XFS_IS_OQUOTA_ON((d)->q_mount)))) 130 (XFS_IS_OQUOTA_ON((d)->q_mount))))
118 131
132extern int xfs_qm_dqread(struct xfs_mount *, xfs_dqid_t, uint,
133 uint, struct xfs_dquot **);
119extern void xfs_qm_dqdestroy(xfs_dquot_t *); 134extern void xfs_qm_dqdestroy(xfs_dquot_t *);
120extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 135extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
121extern int xfs_qm_dqpurge(xfs_dquot_t *); 136extern void xfs_qm_dqpurge(xfs_dquot_t *);
122extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 137extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
123extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
124extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
125extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, 138extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
126 xfs_disk_dquot_t *); 139 xfs_disk_dquot_t *);
127extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, 140extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
@@ -129,9 +142,17 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
129extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, 142extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
130 xfs_dqid_t, uint, uint, xfs_dquot_t **); 143 xfs_dqid_t, uint, uint, xfs_dquot_t **);
131extern void xfs_qm_dqput(xfs_dquot_t *); 144extern void xfs_qm_dqput(xfs_dquot_t *);
132extern void xfs_dqlock(xfs_dquot_t *); 145
133extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); 146extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
134extern void xfs_dqunlock(xfs_dquot_t *); 147extern void xfs_dqunlock(struct xfs_dquot *);
135extern void xfs_dqunlock_nonotify(xfs_dquot_t *); 148extern void xfs_dqflock_pushbuf_wait(struct xfs_dquot *dqp);
149
150static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
151{
152 xfs_dqlock(dqp);
153 dqp->q_nrefs++;
154 xfs_dqunlock(dqp);
155 return dqp;
156}
136 157
137#endif /* __XFS_DQUOT_H__ */ 158#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 0dee0b71029d..34baeae45265 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -73,7 +73,6 @@ xfs_qm_dquot_logitem_format(
73 logvec->i_len = sizeof(xfs_disk_dquot_t); 73 logvec->i_len = sizeof(xfs_disk_dquot_t);
74 logvec->i_type = XLOG_REG_TYPE_DQUOT; 74 logvec->i_type = XLOG_REG_TYPE_DQUOT;
75 75
76 ASSERT(2 == lip->li_desc->lid_size);
77 qlip->qli_format.qlf_size = 2; 76 qlip->qli_format.qlf_size = 2;
78 77
79} 78}
@@ -134,7 +133,7 @@ xfs_qm_dquot_logitem_push(
134 * lock without sleeping, then there must not have been 133 * lock without sleeping, then there must not have been
135 * anyone in the process of flushing the dquot. 134 * anyone in the process of flushing the dquot.
136 */ 135 */
137 error = xfs_qm_dqflush(dqp, 0); 136 error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
138 if (error) 137 if (error)
139 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", 138 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
140 __func__, error, dqp); 139 __func__, error, dqp);
@@ -237,7 +236,7 @@ xfs_qm_dquot_logitem_trylock(
237 if (atomic_read(&dqp->q_pincount) > 0) 236 if (atomic_read(&dqp->q_pincount) > 0)
238 return XFS_ITEM_PINNED; 237 return XFS_ITEM_PINNED;
239 238
240 if (!xfs_qm_dqlock_nowait(dqp)) 239 if (!xfs_dqlock_nowait(dqp))
241 return XFS_ITEM_LOCKED; 240 return XFS_ITEM_LOCKED;
242 241
243 if (!xfs_dqflock_nowait(dqp)) { 242 if (!xfs_dqflock_nowait(dqp)) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 753ed9b5c70b..f675f3d9d7b3 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -209,10 +209,10 @@ xfs_file_fsync(
209 209
210 /* 210 /*
211 * First check if the VFS inode is marked dirty. All the dirtying 211 * First check if the VFS inode is marked dirty. All the dirtying
212 * of non-transactional updates no goes through mark_inode_dirty*, 212 * of non-transactional updates do not go through mark_inode_dirty*,
213 * which allows us to distinguish beteeen pure timestamp updates 213 * which allows us to distinguish between pure timestamp updates
214 * and i_size updates which need to be caught for fdatasync. 214 * and i_size updates which need to be caught for fdatasync.
215 * After that also theck for the dirty state in the XFS inode, which 215 * After that also check for the dirty state in the XFS inode, which
216 * might gets cleared when the inode gets written out via the AIL 216 * might gets cleared when the inode gets written out via the AIL
217 * or xfs_iflush_cluster. 217 * or xfs_iflush_cluster.
218 */ 218 */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 169380e66057..dad1a31aa4fc 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -447,7 +447,7 @@ STATIC xfs_buf_t * /* allocation group buffer */
447xfs_ialloc_ag_select( 447xfs_ialloc_ag_select(
448 xfs_trans_t *tp, /* transaction pointer */ 448 xfs_trans_t *tp, /* transaction pointer */
449 xfs_ino_t parent, /* parent directory inode number */ 449 xfs_ino_t parent, /* parent directory inode number */
450 mode_t mode, /* bits set to indicate file type */ 450 umode_t mode, /* bits set to indicate file type */
451 int okalloc) /* ok to allocate more space */ 451 int okalloc) /* ok to allocate more space */
452{ 452{
453 xfs_buf_t *agbp; /* allocation group header buffer */ 453 xfs_buf_t *agbp; /* allocation group header buffer */
@@ -640,7 +640,7 @@ int
640xfs_dialloc( 640xfs_dialloc(
641 xfs_trans_t *tp, /* transaction pointer */ 641 xfs_trans_t *tp, /* transaction pointer */
642 xfs_ino_t parent, /* parent inode (directory) */ 642 xfs_ino_t parent, /* parent inode (directory) */
643 mode_t mode, /* mode bits for new inode */ 643 umode_t mode, /* mode bits for new inode */
644 int okalloc, /* ok to allocate more space */ 644 int okalloc, /* ok to allocate more space */
645 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */ 645 xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
646 boolean_t *alloc_done, /* true if we needed to replenish 646 boolean_t *alloc_done, /* true if we needed to replenish
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index bb5385475e1f..666a037398d6 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -81,7 +81,7 @@ int /* error */
81xfs_dialloc( 81xfs_dialloc(
82 struct xfs_trans *tp, /* transaction pointer */ 82 struct xfs_trans *tp, /* transaction pointer */
83 xfs_ino_t parent, /* parent inode (directory) */ 83 xfs_ino_t parent, /* parent inode (directory) */
84 mode_t mode, /* mode bits for new inode */ 84 umode_t mode, /* mode bits for new inode */
85 int okalloc, /* ok to allocate more space */ 85 int okalloc, /* ok to allocate more space */
86 struct xfs_buf **agbp, /* buf for a.g. inode header */ 86 struct xfs_buf **agbp, /* buf for a.g. inode header */
87 boolean_t *alloc_done, /* an allocation was done to replenish 87 boolean_t *alloc_done, /* an allocation was done to replenish
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0fa98b1c70ea..3960a066d7ff 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -107,7 +107,6 @@ xfs_inode_free_callback(
107 struct inode *inode = container_of(head, struct inode, i_rcu); 107 struct inode *inode = container_of(head, struct inode, i_rcu);
108 struct xfs_inode *ip = XFS_I(inode); 108 struct xfs_inode *ip = XFS_I(inode);
109 109
110 INIT_LIST_HEAD(&inode->i_dentry);
111 kmem_zone_free(xfs_inode_zone, ip); 110 kmem_zone_free(xfs_inode_zone, ip);
112} 111}
113 112
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 755ee8164880..9dda7cc32848 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -961,7 +961,7 @@ int
961xfs_ialloc( 961xfs_ialloc(
962 xfs_trans_t *tp, 962 xfs_trans_t *tp,
963 xfs_inode_t *pip, 963 xfs_inode_t *pip,
964 mode_t mode, 964 umode_t mode,
965 xfs_nlink_t nlink, 965 xfs_nlink_t nlink,
966 xfs_dev_t rdev, 966 xfs_dev_t rdev,
967 prid_t prid, 967 prid_t prid,
@@ -1002,7 +1002,7 @@ xfs_ialloc(
1002 return error; 1002 return error;
1003 ASSERT(ip != NULL); 1003 ASSERT(ip != NULL);
1004 1004
1005 ip->i_d.di_mode = (__uint16_t)mode; 1005 ip->i_d.di_mode = mode;
1006 ip->i_d.di_onlink = 0; 1006 ip->i_d.di_onlink = 0;
1007 ip->i_d.di_nlink = nlink; 1007 ip->i_d.di_nlink = nlink;
1008 ASSERT(ip->i_d.di_nlink == nlink); 1008 ASSERT(ip->i_d.di_nlink == nlink);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b4cd4739f98e..f0e6b151ba37 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -481,7 +481,7 @@ void xfs_inode_free(struct xfs_inode *ip);
481/* 481/*
482 * xfs_inode.c prototypes. 482 * xfs_inode.c prototypes.
483 */ 483 */
484int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, 484int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
485 xfs_nlink_t, xfs_dev_t, prid_t, int, 485 xfs_nlink_t, xfs_dev_t, prid_t, int,
486 struct xfs_buf **, boolean_t *, xfs_inode_t **); 486 struct xfs_buf **, boolean_t *, xfs_inode_t **);
487 487
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index abaafdbb3e65..cfd6c7f8cc3c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -437,7 +437,6 @@ xfs_inode_item_format(
437 * Assert that no attribute-related log flags are set. 437 * Assert that no attribute-related log flags are set.
438 */ 438 */
439 if (!XFS_IFORK_Q(ip)) { 439 if (!XFS_IFORK_Q(ip)) {
440 ASSERT(nvecs == lip->li_desc->lid_size);
441 iip->ili_format.ilf_size = nvecs; 440 iip->ili_format.ilf_size = nvecs;
442 ASSERT(!(iip->ili_format.ilf_fields & 441 ASSERT(!(iip->ili_format.ilf_fields &
443 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT))); 442 (XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT)));
@@ -521,7 +520,6 @@ xfs_inode_item_format(
521 break; 520 break;
522 } 521 }
523 522
524 ASSERT(nvecs == lip->li_desc->lid_size);
525 iip->ili_format.ilf_size = nvecs; 523 iip->ili_format.ilf_size = nvecs;
526} 524}
527 525
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d99a90518909..76f3ca5cfc36 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -559,23 +559,23 @@ xfs_attrmulti_by_handle(
559 ops[i].am_flags); 559 ops[i].am_flags);
560 break; 560 break;
561 case ATTR_OP_SET: 561 case ATTR_OP_SET:
562 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); 562 ops[i].am_error = mnt_want_write_file(parfilp);
563 if (ops[i].am_error) 563 if (ops[i].am_error)
564 break; 564 break;
565 ops[i].am_error = xfs_attrmulti_attr_set( 565 ops[i].am_error = xfs_attrmulti_attr_set(
566 dentry->d_inode, attr_name, 566 dentry->d_inode, attr_name,
567 ops[i].am_attrvalue, ops[i].am_length, 567 ops[i].am_attrvalue, ops[i].am_length,
568 ops[i].am_flags); 568 ops[i].am_flags);
569 mnt_drop_write(parfilp->f_path.mnt); 569 mnt_drop_write_file(parfilp);
570 break; 570 break;
571 case ATTR_OP_REMOVE: 571 case ATTR_OP_REMOVE:
572 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); 572 ops[i].am_error = mnt_want_write_file(parfilp);
573 if (ops[i].am_error) 573 if (ops[i].am_error)
574 break; 574 break;
575 ops[i].am_error = xfs_attrmulti_attr_remove( 575 ops[i].am_error = xfs_attrmulti_attr_remove(
576 dentry->d_inode, attr_name, 576 dentry->d_inode, attr_name,
577 ops[i].am_flags); 577 ops[i].am_flags);
578 mnt_drop_write(parfilp->f_path.mnt); 578 mnt_drop_write_file(parfilp);
579 break; 579 break;
580 default: 580 default:
581 ops[i].am_error = EINVAL; 581 ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 54e623bfbb85..f9ccb7b7c043 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -454,23 +454,23 @@ xfs_compat_attrmulti_by_handle(
454 &ops[i].am_length, ops[i].am_flags); 454 &ops[i].am_length, ops[i].am_flags);
455 break; 455 break;
456 case ATTR_OP_SET: 456 case ATTR_OP_SET:
457 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); 457 ops[i].am_error = mnt_want_write_file(parfilp);
458 if (ops[i].am_error) 458 if (ops[i].am_error)
459 break; 459 break;
460 ops[i].am_error = xfs_attrmulti_attr_set( 460 ops[i].am_error = xfs_attrmulti_attr_set(
461 dentry->d_inode, attr_name, 461 dentry->d_inode, attr_name,
462 compat_ptr(ops[i].am_attrvalue), 462 compat_ptr(ops[i].am_attrvalue),
463 ops[i].am_length, ops[i].am_flags); 463 ops[i].am_length, ops[i].am_flags);
464 mnt_drop_write(parfilp->f_path.mnt); 464 mnt_drop_write_file(parfilp);
465 break; 465 break;
466 case ATTR_OP_REMOVE: 466 case ATTR_OP_REMOVE:
467 ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); 467 ops[i].am_error = mnt_want_write_file(parfilp);
468 if (ops[i].am_error) 468 if (ops[i].am_error)
469 break; 469 break;
470 ops[i].am_error = xfs_attrmulti_attr_remove( 470 ops[i].am_error = xfs_attrmulti_attr_remove(
471 dentry->d_inode, attr_name, 471 dentry->d_inode, attr_name,
472 ops[i].am_flags); 472 ops[i].am_flags);
473 mnt_drop_write(parfilp->f_path.mnt); 473 mnt_drop_write_file(parfilp);
474 break; 474 break;
475 default: 475 default:
476 ops[i].am_error = EINVAL; 476 ops[i].am_error = EINVAL;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 23ce927973a4..f9babd179223 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -168,7 +168,7 @@ STATIC int
168xfs_vn_mknod( 168xfs_vn_mknod(
169 struct inode *dir, 169 struct inode *dir,
170 struct dentry *dentry, 170 struct dentry *dentry,
171 int mode, 171 umode_t mode,
172 dev_t rdev) 172 dev_t rdev)
173{ 173{
174 struct inode *inode; 174 struct inode *inode;
@@ -231,7 +231,7 @@ STATIC int
231xfs_vn_create( 231xfs_vn_create(
232 struct inode *dir, 232 struct inode *dir,
233 struct dentry *dentry, 233 struct dentry *dentry,
234 int mode, 234 umode_t mode,
235 struct nameidata *nd) 235 struct nameidata *nd)
236{ 236{
237 return xfs_vn_mknod(dir, dentry, mode, 0); 237 return xfs_vn_mknod(dir, dentry, mode, 0);
@@ -241,7 +241,7 @@ STATIC int
241xfs_vn_mkdir( 241xfs_vn_mkdir(
242 struct inode *dir, 242 struct inode *dir,
243 struct dentry *dentry, 243 struct dentry *dentry,
244 int mode) 244 umode_t mode)
245{ 245{
246 return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); 246 return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
247} 247}
@@ -366,7 +366,7 @@ xfs_vn_symlink(
366 struct xfs_inode *cip = NULL; 366 struct xfs_inode *cip = NULL;
367 struct xfs_name name; 367 struct xfs_name name;
368 int error; 368 int error;
369 mode_t mode; 369 umode_t mode;
370 370
371 mode = S_IFLNK | 371 mode = S_IFLNK |
372 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); 372 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 34817adf4b9e..e2cc3568c299 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -760,38 +760,6 @@ xfs_log_item_init(
760 INIT_LIST_HEAD(&item->li_cil); 760 INIT_LIST_HEAD(&item->li_cil);
761} 761}
762 762
763/*
764 * Write region vectors to log. The write happens using the space reservation
765 * of the ticket (tic). It is not a requirement that all writes for a given
766 * transaction occur with one call to xfs_log_write(). However, it is important
767 * to note that the transaction reservation code makes an assumption about the
768 * number of log headers a transaction requires that may be violated if you
769 * don't pass all the transaction vectors in one call....
770 */
771int
772xfs_log_write(
773 struct xfs_mount *mp,
774 struct xfs_log_iovec reg[],
775 int nentries,
776 struct xlog_ticket *tic,
777 xfs_lsn_t *start_lsn)
778{
779 struct log *log = mp->m_log;
780 int error;
781 struct xfs_log_vec vec = {
782 .lv_niovecs = nentries,
783 .lv_iovecp = reg,
784 };
785
786 if (XLOG_FORCED_SHUTDOWN(log))
787 return XFS_ERROR(EIO);
788
789 error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
790 if (error)
791 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
792 return error;
793}
794
795void 763void
796xfs_log_move_tail(xfs_mount_t *mp, 764xfs_log_move_tail(xfs_mount_t *mp,
797 xfs_lsn_t tail_lsn) 765 xfs_lsn_t tail_lsn)
@@ -1685,7 +1653,7 @@ xlog_print_tic_res(
1685 }; 1653 };
1686 1654
1687 xfs_warn(mp, 1655 xfs_warn(mp,
1688 "xfs_log_write: reservation summary:\n" 1656 "xlog_write: reservation summary:\n"
1689 " trans type = %s (%u)\n" 1657 " trans type = %s (%u)\n"
1690 " unit res = %d bytes\n" 1658 " unit res = %d bytes\n"
1691 " current res = %d bytes\n" 1659 " current res = %d bytes\n"
@@ -1714,7 +1682,7 @@ xlog_print_tic_res(
1714 } 1682 }
1715 1683
1716 xfs_alert_tag(mp, XFS_PTAG_LOGRES, 1684 xfs_alert_tag(mp, XFS_PTAG_LOGRES,
1717 "xfs_log_write: reservation ran out. Need to up reservation"); 1685 "xlog_write: reservation ran out. Need to up reservation");
1718 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1686 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1719} 1687}
1720 1688
@@ -1968,23 +1936,21 @@ xlog_write(
1968 *start_lsn = 0; 1936 *start_lsn = 0;
1969 1937
1970 len = xlog_write_calc_vec_length(ticket, log_vector); 1938 len = xlog_write_calc_vec_length(ticket, log_vector);
1971 if (log->l_cilp) {
1972 /*
1973 * Region headers and bytes are already accounted for.
1974 * We only need to take into account start records and
1975 * split regions in this function.
1976 */
1977 if (ticket->t_flags & XLOG_TIC_INITED)
1978 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1979 1939
1980 /* 1940 /*
1981 * Commit record headers need to be accounted for. These 1941 * Region headers and bytes are already accounted for.
1982 * come in as separate writes so are easy to detect. 1942 * We only need to take into account start records and
1983 */ 1943 * split regions in this function.
1984 if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS)) 1944 */
1985 ticket->t_curr_res -= sizeof(xlog_op_header_t); 1945 if (ticket->t_flags & XLOG_TIC_INITED)
1986 } else 1946 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1987 ticket->t_curr_res -= len; 1947
1948 /*
1949 * Commit record headers need to be accounted for. These
1950 * come in as separate writes so are easy to detect.
1951 */
1952 if (flags & (XLOG_COMMIT_TRANS | XLOG_UNMOUNT_TRANS))
1953 ticket->t_curr_res -= sizeof(xlog_op_header_t);
1988 1954
1989 if (ticket->t_curr_res < 0) 1955 if (ticket->t_curr_res < 0)
1990 xlog_print_tic_res(log->l_mp, ticket); 1956 xlog_print_tic_res(log->l_mp, ticket);
@@ -2931,8 +2897,7 @@ _xfs_log_force(
2931 2897
2932 XFS_STATS_INC(xs_log_force); 2898 XFS_STATS_INC(xs_log_force);
2933 2899
2934 if (log->l_cilp) 2900 xlog_cil_force(log);
2935 xlog_cil_force(log);
2936 2901
2937 spin_lock(&log->l_icloglock); 2902 spin_lock(&log->l_icloglock);
2938 2903
@@ -3081,11 +3046,9 @@ _xfs_log_force_lsn(
3081 3046
3082 XFS_STATS_INC(xs_log_force); 3047 XFS_STATS_INC(xs_log_force);
3083 3048
3084 if (log->l_cilp) { 3049 lsn = xlog_cil_force_lsn(log, lsn);
3085 lsn = xlog_cil_force_lsn(log, lsn); 3050 if (lsn == NULLCOMMITLSN)
3086 if (lsn == NULLCOMMITLSN) 3051 return 0;
3087 return 0;
3088 }
3089 3052
3090try_again: 3053try_again:
3091 spin_lock(&log->l_icloglock); 3054 spin_lock(&log->l_icloglock);
@@ -3653,7 +3616,7 @@ xfs_log_force_umount(
3653 * completed transactions are flushed to disk with the xfs_log_force() 3616 * completed transactions are flushed to disk with the xfs_log_force()
3654 * call below. 3617 * call below.
3655 */ 3618 */
3656 if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG)) 3619 if (!logerror)
3657 xlog_cil_force(log); 3620 xlog_cil_force(log);
3658 3621
3659 /* 3622 /*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3f7bf451c034..2aee3b22d29c 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -174,11 +174,6 @@ int xfs_log_reserve(struct xfs_mount *mp,
174 __uint8_t clientid, 174 __uint8_t clientid,
175 uint flags, 175 uint flags,
176 uint t_type); 176 uint t_type);
177int xfs_log_write(struct xfs_mount *mp,
178 xfs_log_iovec_t region[],
179 int nentries,
180 struct xlog_ticket *ticket,
181 xfs_lsn_t *start_lsn);
182int xfs_log_unmount_write(struct xfs_mount *mp); 177int xfs_log_unmount_write(struct xfs_mount *mp);
183void xfs_log_unmount(struct xfs_mount *mp); 178void xfs_log_unmount(struct xfs_mount *mp);
184int xfs_log_force_umount(struct xfs_mount *mp, int logerror); 179int xfs_log_force_umount(struct xfs_mount *mp, int logerror);
@@ -189,8 +184,7 @@ void xlog_iodone(struct xfs_buf *);
189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 184struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
190void xfs_log_ticket_put(struct xlog_ticket *ticket); 185void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 186
192void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 187int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
193 struct xfs_log_vec *log_vector,
194 xfs_lsn_t *commit_lsn, int flags); 188 xfs_lsn_t *commit_lsn, int flags);
195bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 189bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
196 190
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index c7755d5a5fbe..d4fadbe8ac90 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -32,10 +32,7 @@
32#include "xfs_discard.h" 32#include "xfs_discard.h"
33 33
34/* 34/*
35 * Perform initial CIL structure initialisation. If the CIL is not 35 * Perform initial CIL structure initialisation.
36 * enabled in this filesystem, ensure the log->l_cilp is null so
37 * we can check this conditional to determine if we are doing delayed
38 * logging or not.
39 */ 36 */
40int 37int
41xlog_cil_init( 38xlog_cil_init(
@@ -44,10 +41,6 @@ xlog_cil_init(
44 struct xfs_cil *cil; 41 struct xfs_cil *cil;
45 struct xfs_cil_ctx *ctx; 42 struct xfs_cil_ctx *ctx;
46 43
47 log->l_cilp = NULL;
48 if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
49 return 0;
50
51 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL); 44 cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
52 if (!cil) 45 if (!cil)
53 return ENOMEM; 46 return ENOMEM;
@@ -80,9 +73,6 @@ void
80xlog_cil_destroy( 73xlog_cil_destroy(
81 struct log *log) 74 struct log *log)
82{ 75{
83 if (!log->l_cilp)
84 return;
85
86 if (log->l_cilp->xc_ctx) { 76 if (log->l_cilp->xc_ctx) {
87 if (log->l_cilp->xc_ctx->ticket) 77 if (log->l_cilp->xc_ctx->ticket)
88 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); 78 xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
@@ -137,9 +127,6 @@ void
137xlog_cil_init_post_recovery( 127xlog_cil_init_post_recovery(
138 struct log *log) 128 struct log *log)
139{ 129{
140 if (!log->l_cilp)
141 return;
142
143 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); 130 log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
144 log->l_cilp->xc_ctx->sequence = 1; 131 log->l_cilp->xc_ctx->sequence = 1;
145 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle, 132 log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
@@ -172,37 +159,73 @@ xlog_cil_init_post_recovery(
172 * format the regions into the iclog as though they are being formatted 159 * format the regions into the iclog as though they are being formatted
173 * directly out of the objects themselves. 160 * directly out of the objects themselves.
174 */ 161 */
175static void 162static struct xfs_log_vec *
176xlog_cil_format_items( 163xlog_cil_prepare_log_vecs(
177 struct log *log, 164 struct xfs_trans *tp)
178 struct xfs_log_vec *log_vector)
179{ 165{
180 struct xfs_log_vec *lv; 166 struct xfs_log_item_desc *lidp;
167 struct xfs_log_vec *lv = NULL;
168 struct xfs_log_vec *ret_lv = NULL;
181 169
182 ASSERT(log_vector); 170
183 for (lv = log_vector; lv; lv = lv->lv_next) { 171 /* Bail out if we didn't find a log item. */
172 if (list_empty(&tp->t_items)) {
173 ASSERT(0);
174 return NULL;
175 }
176
177 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
178 struct xfs_log_vec *new_lv;
184 void *ptr; 179 void *ptr;
185 int index; 180 int index;
186 int len = 0; 181 int len = 0;
182 uint niovecs;
183
184 /* Skip items which aren't dirty in this transaction. */
185 if (!(lidp->lid_flags & XFS_LID_DIRTY))
186 continue;
187
188 /* Skip items that do not have any vectors for writing */
189 niovecs = IOP_SIZE(lidp->lid_item);
190 if (!niovecs)
191 continue;
192
193 new_lv = kmem_zalloc(sizeof(*new_lv) +
194 niovecs * sizeof(struct xfs_log_iovec),
195 KM_SLEEP);
196
197 /* The allocated iovec region lies beyond the log vector. */
198 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
199 new_lv->lv_niovecs = niovecs;
200 new_lv->lv_item = lidp->lid_item;
187 201
188 /* build the vector array and calculate it's length */ 202 /* build the vector array and calculate it's length */
189 IOP_FORMAT(lv->lv_item, lv->lv_iovecp); 203 IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp);
190 for (index = 0; index < lv->lv_niovecs; index++) 204 for (index = 0; index < new_lv->lv_niovecs; index++)
191 len += lv->lv_iovecp[index].i_len; 205 len += new_lv->lv_iovecp[index].i_len;
192 206
193 lv->lv_buf_len = len; 207 new_lv->lv_buf_len = len;
194 lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); 208 new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len,
195 ptr = lv->lv_buf; 209 KM_SLEEP|KM_NOFS);
210 ptr = new_lv->lv_buf;
196 211
197 for (index = 0; index < lv->lv_niovecs; index++) { 212 for (index = 0; index < new_lv->lv_niovecs; index++) {
198 struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; 213 struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index];
199 214
200 memcpy(ptr, vec->i_addr, vec->i_len); 215 memcpy(ptr, vec->i_addr, vec->i_len);
201 vec->i_addr = ptr; 216 vec->i_addr = ptr;
202 ptr += vec->i_len; 217 ptr += vec->i_len;
203 } 218 }
204 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len); 219 ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len);
220
221 if (!ret_lv)
222 ret_lv = new_lv;
223 else
224 lv->lv_next = new_lv;
225 lv = new_lv;
205 } 226 }
227
228 return ret_lv;
206} 229}
207 230
208/* 231/*
@@ -256,7 +279,7 @@ xfs_cil_prepare_item(
256 * Insert the log items into the CIL and calculate the difference in space 279 * Insert the log items into the CIL and calculate the difference in space
257 * consumed by the item. Add the space to the checkpoint ticket and calculate 280 * consumed by the item. Add the space to the checkpoint ticket and calculate
258 * if the change requires additional log metadata. If it does, take that space 281 * if the change requires additional log metadata. If it does, take that space
259 * as well. Remove the amount of space we addded to the checkpoint ticket from 282 * as well. Remove the amount of space we added to the checkpoint ticket from
260 * the current transaction ticket so that the accounting works out correctly. 283 * the current transaction ticket so that the accounting works out correctly.
261 */ 284 */
262static void 285static void
@@ -635,28 +658,30 @@ out_abort:
635 * background commit, returns without it held once background commits are 658 * background commit, returns without it held once background commits are
636 * allowed again. 659 * allowed again.
637 */ 660 */
638void 661int
639xfs_log_commit_cil( 662xfs_log_commit_cil(
640 struct xfs_mount *mp, 663 struct xfs_mount *mp,
641 struct xfs_trans *tp, 664 struct xfs_trans *tp,
642 struct xfs_log_vec *log_vector,
643 xfs_lsn_t *commit_lsn, 665 xfs_lsn_t *commit_lsn,
644 int flags) 666 int flags)
645{ 667{
646 struct log *log = mp->m_log; 668 struct log *log = mp->m_log;
647 int log_flags = 0; 669 int log_flags = 0;
648 int push = 0; 670 int push = 0;
671 struct xfs_log_vec *log_vector;
649 672
650 if (flags & XFS_TRANS_RELEASE_LOG_RES) 673 if (flags & XFS_TRANS_RELEASE_LOG_RES)
651 log_flags = XFS_LOG_REL_PERM_RESERV; 674 log_flags = XFS_LOG_REL_PERM_RESERV;
652 675
653 /* 676 /*
654 * do all the hard work of formatting items (including memory 677 * Do all the hard work of formatting items (including memory
655 * allocation) outside the CIL context lock. This prevents stalling CIL 678 * allocation) outside the CIL context lock. This prevents stalling CIL
656 * pushes when we are low on memory and a transaction commit spends a 679 * pushes when we are low on memory and a transaction commit spends a
657 * lot of time in memory reclaim. 680 * lot of time in memory reclaim.
658 */ 681 */
659 xlog_cil_format_items(log, log_vector); 682 log_vector = xlog_cil_prepare_log_vecs(tp);
683 if (!log_vector)
684 return ENOMEM;
660 685
661 /* lock out background commit */ 686 /* lock out background commit */
662 down_read(&log->l_cilp->xc_ctx_lock); 687 down_read(&log->l_cilp->xc_ctx_lock);
@@ -709,6 +734,7 @@ xfs_log_commit_cil(
709 */ 734 */
710 if (push) 735 if (push)
711 xlog_cil_push(log, 0); 736 xlog_cil_push(log, 0);
737 return 0;
712} 738}
713 739
714/* 740/*
@@ -786,8 +812,6 @@ xfs_log_item_in_current_chkpt(
786{ 812{
787 struct xfs_cil_ctx *ctx; 813 struct xfs_cil_ctx *ctx;
788 814
789 if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
790 return false;
791 if (list_empty(&lip->li_cil)) 815 if (list_empty(&lip->li_cil))
792 return false; 816 return false;
793 817
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index bb24dac42a25..19f69e232509 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -219,7 +219,6 @@ typedef struct xfs_mount {
219#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops 219#define XFS_MOUNT_WSYNC (1ULL << 0) /* for nfs - all metadata ops
220 must be synchronous except 220 must be synchronous except
221 for space allocations */ 221 for space allocations */
222#define XFS_MOUNT_DELAYLOG (1ULL << 1) /* delayed logging is enabled */
223#define XFS_MOUNT_WAS_CLEAN (1ULL << 3) 222#define XFS_MOUNT_WAS_CLEAN (1ULL << 3)
224#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem 223#define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4) /* atomic stop of all filesystem
225 operations, typically for 224 operations, typically for
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 0bbb1a41998b..671f37eae1c7 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -154,12 +154,17 @@ STATIC void
154xfs_qm_destroy( 154xfs_qm_destroy(
155 struct xfs_qm *xqm) 155 struct xfs_qm *xqm)
156{ 156{
157 struct xfs_dquot *dqp, *n;
158 int hsize, i; 157 int hsize, i;
159 158
160 ASSERT(xqm != NULL); 159 ASSERT(xqm != NULL);
161 ASSERT(xqm->qm_nrefs == 0); 160 ASSERT(xqm->qm_nrefs == 0);
161
162 unregister_shrinker(&xfs_qm_shaker); 162 unregister_shrinker(&xfs_qm_shaker);
163
164 mutex_lock(&xqm->qm_dqfrlist_lock);
165 ASSERT(list_empty(&xqm->qm_dqfrlist));
166 mutex_unlock(&xqm->qm_dqfrlist_lock);
167
163 hsize = xqm->qm_dqhashmask + 1; 168 hsize = xqm->qm_dqhashmask + 1;
164 for (i = 0; i < hsize; i++) { 169 for (i = 0; i < hsize; i++) {
165 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 170 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
@@ -171,17 +176,6 @@ xfs_qm_destroy(
171 xqm->qm_grp_dqhtable = NULL; 176 xqm->qm_grp_dqhtable = NULL;
172 xqm->qm_dqhashmask = 0; 177 xqm->qm_dqhashmask = 0;
173 178
174 /* frlist cleanup */
175 mutex_lock(&xqm->qm_dqfrlist_lock);
176 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
177 xfs_dqlock(dqp);
178 list_del_init(&dqp->q_freelist);
179 xfs_Gqm->qm_dqfrlist_cnt--;
180 xfs_dqunlock(dqp);
181 xfs_qm_dqdestroy(dqp);
182 }
183 mutex_unlock(&xqm->qm_dqfrlist_lock);
184 mutex_destroy(&xqm->qm_dqfrlist_lock);
185 kmem_free(xqm); 179 kmem_free(xqm);
186} 180}
187 181
@@ -232,34 +226,10 @@ STATIC void
232xfs_qm_rele_quotafs_ref( 226xfs_qm_rele_quotafs_ref(
233 struct xfs_mount *mp) 227 struct xfs_mount *mp)
234{ 228{
235 xfs_dquot_t *dqp, *n;
236
237 ASSERT(xfs_Gqm); 229 ASSERT(xfs_Gqm);
238 ASSERT(xfs_Gqm->qm_nrefs > 0); 230 ASSERT(xfs_Gqm->qm_nrefs > 0);
239 231
240 /* 232 /*
241 * Go thru the freelist and destroy all inactive dquots.
242 */
243 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
244
245 list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
246 xfs_dqlock(dqp);
247 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
248 ASSERT(dqp->q_mount == NULL);
249 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
250 ASSERT(list_empty(&dqp->q_hashlist));
251 ASSERT(list_empty(&dqp->q_mplist));
252 list_del_init(&dqp->q_freelist);
253 xfs_Gqm->qm_dqfrlist_cnt--;
254 xfs_dqunlock(dqp);
255 xfs_qm_dqdestroy(dqp);
256 } else {
257 xfs_dqunlock(dqp);
258 }
259 }
260 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
261
262 /*
263 * Destroy the entire XQM. If somebody mounts with quotaon, this'll 233 * Destroy the entire XQM. If somebody mounts with quotaon, this'll
264 * be restarted. 234 * be restarted.
265 */ 235 */
@@ -415,8 +385,7 @@ xfs_qm_unmount_quotas(
415 */ 385 */
416STATIC int 386STATIC int
417xfs_qm_dqflush_all( 387xfs_qm_dqflush_all(
418 struct xfs_mount *mp, 388 struct xfs_mount *mp)
419 int sync_mode)
420{ 389{
421 struct xfs_quotainfo *q = mp->m_quotainfo; 390 struct xfs_quotainfo *q = mp->m_quotainfo;
422 int recl; 391 int recl;
@@ -429,7 +398,8 @@ again:
429 mutex_lock(&q->qi_dqlist_lock); 398 mutex_lock(&q->qi_dqlist_lock);
430 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 399 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
431 xfs_dqlock(dqp); 400 xfs_dqlock(dqp);
432 if (! XFS_DQ_IS_DIRTY(dqp)) { 401 if ((dqp->dq_flags & XFS_DQ_FREEING) ||
402 !XFS_DQ_IS_DIRTY(dqp)) {
433 xfs_dqunlock(dqp); 403 xfs_dqunlock(dqp);
434 continue; 404 continue;
435 } 405 }
@@ -444,14 +414,14 @@ again:
444 * out immediately. We'll be able to acquire 414 * out immediately. We'll be able to acquire
445 * the flush lock when the I/O completes. 415 * the flush lock when the I/O completes.
446 */ 416 */
447 xfs_qm_dqflock_pushbuf_wait(dqp); 417 xfs_dqflock_pushbuf_wait(dqp);
448 } 418 }
449 /* 419 /*
450 * Let go of the mplist lock. We don't want to hold it 420 * Let go of the mplist lock. We don't want to hold it
451 * across a disk write. 421 * across a disk write.
452 */ 422 */
453 mutex_unlock(&q->qi_dqlist_lock); 423 mutex_unlock(&q->qi_dqlist_lock);
454 error = xfs_qm_dqflush(dqp, sync_mode); 424 error = xfs_qm_dqflush(dqp, 0);
455 xfs_dqunlock(dqp); 425 xfs_dqunlock(dqp);
456 if (error) 426 if (error)
457 return error; 427 return error;
@@ -468,6 +438,7 @@ again:
468 /* return ! busy */ 438 /* return ! busy */
469 return 0; 439 return 0;
470} 440}
441
471/* 442/*
472 * Release the group dquot pointers the user dquots may be 443 * Release the group dquot pointers the user dquots may be
473 * carrying around as a hint. mplist is locked on entry and exit. 444 * carrying around as a hint. mplist is locked on entry and exit.
@@ -478,31 +449,26 @@ xfs_qm_detach_gdquots(
478{ 449{
479 struct xfs_quotainfo *q = mp->m_quotainfo; 450 struct xfs_quotainfo *q = mp->m_quotainfo;
480 struct xfs_dquot *dqp, *gdqp; 451 struct xfs_dquot *dqp, *gdqp;
481 int nrecl;
482 452
483 again: 453 again:
484 ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); 454 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
485 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { 455 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
486 xfs_dqlock(dqp); 456 xfs_dqlock(dqp);
487 if ((gdqp = dqp->q_gdquot)) { 457 if (dqp->dq_flags & XFS_DQ_FREEING) {
488 xfs_dqlock(gdqp); 458 xfs_dqunlock(dqp);
489 dqp->q_gdquot = NULL;
490 }
491 xfs_dqunlock(dqp);
492
493 if (gdqp) {
494 /*
495 * Can't hold the mplist lock across a dqput.
496 * XXXmust convert to marker based iterations here.
497 */
498 nrecl = q->qi_dqreclaims;
499 mutex_unlock(&q->qi_dqlist_lock); 459 mutex_unlock(&q->qi_dqlist_lock);
500 xfs_qm_dqput(gdqp); 460 delay(1);
501
502 mutex_lock(&q->qi_dqlist_lock); 461 mutex_lock(&q->qi_dqlist_lock);
503 if (nrecl != q->qi_dqreclaims) 462 goto again;
504 goto again;
505 } 463 }
464
465 gdqp = dqp->q_gdquot;
466 if (gdqp)
467 dqp->q_gdquot = NULL;
468 xfs_dqunlock(dqp);
469
470 if (gdqp)
471 xfs_qm_dqrele(gdqp);
506 } 472 }
507} 473}
508 474
@@ -520,8 +486,8 @@ xfs_qm_dqpurge_int(
520 struct xfs_quotainfo *q = mp->m_quotainfo; 486 struct xfs_quotainfo *q = mp->m_quotainfo;
521 struct xfs_dquot *dqp, *n; 487 struct xfs_dquot *dqp, *n;
522 uint dqtype; 488 uint dqtype;
523 int nrecl; 489 int nmisses = 0;
524 int nmisses; 490 LIST_HEAD (dispose_list);
525 491
526 if (!q) 492 if (!q)
527 return 0; 493 return 0;
@@ -540,47 +506,26 @@ xfs_qm_dqpurge_int(
540 */ 506 */
541 xfs_qm_detach_gdquots(mp); 507 xfs_qm_detach_gdquots(mp);
542 508
543 again:
544 nmisses = 0;
545 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
546 /* 509 /*
547 * Try to get rid of all of the unwanted dquots. The idea is to 510 * Try to get rid of all of the unwanted dquots.
548 * get them off mplist and hashlist, but leave them on freelist.
549 */ 511 */
550 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { 512 list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
551 /* 513 xfs_dqlock(dqp);
552 * It's OK to look at the type without taking dqlock here. 514 if ((dqp->dq_flags & dqtype) != 0 &&
553 * We're holding the mplist lock here, and that's needed for 515 !(dqp->dq_flags & XFS_DQ_FREEING)) {
554 * a dqreclaim. 516 if (dqp->q_nrefs == 0) {
555 */ 517 dqp->dq_flags |= XFS_DQ_FREEING;
556 if ((dqp->dq_flags & dqtype) == 0) 518 list_move_tail(&dqp->q_mplist, &dispose_list);
557 continue; 519 } else
558 520 nmisses++;
559 if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
560 nrecl = q->qi_dqreclaims;
561 mutex_unlock(&q->qi_dqlist_lock);
562 mutex_lock(&dqp->q_hash->qh_lock);
563 mutex_lock(&q->qi_dqlist_lock);
564
565 /*
566 * XXXTheoretically, we can get into a very long
567 * ping pong game here.
568 * No one can be adding dquots to the mplist at
569 * this point, but somebody might be taking things off.
570 */
571 if (nrecl != q->qi_dqreclaims) {
572 mutex_unlock(&dqp->q_hash->qh_lock);
573 goto again;
574 }
575 } 521 }
576 522 xfs_dqunlock(dqp);
577 /*
578 * Take the dquot off the mplist and hashlist. It may remain on
579 * freelist in INACTIVE state.
580 */
581 nmisses += xfs_qm_dqpurge(dqp);
582 } 523 }
583 mutex_unlock(&q->qi_dqlist_lock); 524 mutex_unlock(&q->qi_dqlist_lock);
525
526 list_for_each_entry_safe(dqp, n, &dispose_list, q_mplist)
527 xfs_qm_dqpurge(dqp);
528
584 return nmisses; 529 return nmisses;
585} 530}
586 531
@@ -648,12 +593,9 @@ xfs_qm_dqattach_one(
648 */ 593 */
649 dqp = udqhint->q_gdquot; 594 dqp = udqhint->q_gdquot;
650 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { 595 if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
651 xfs_dqlock(dqp);
652 XFS_DQHOLD(dqp);
653 ASSERT(*IO_idqpp == NULL); 596 ASSERT(*IO_idqpp == NULL);
654 *IO_idqpp = dqp;
655 597
656 xfs_dqunlock(dqp); 598 *IO_idqpp = xfs_qm_dqhold(dqp);
657 xfs_dqunlock(udqhint); 599 xfs_dqunlock(udqhint);
658 return 0; 600 return 0;
659 } 601 }
@@ -693,11 +635,7 @@ xfs_qm_dqattach_one(
693 635
694/* 636/*
695 * Given a udquot and gdquot, attach a ptr to the group dquot in the 637 * Given a udquot and gdquot, attach a ptr to the group dquot in the
696 * udquot as a hint for future lookups. The idea sounds simple, but the 638 * udquot as a hint for future lookups.
697 * execution isn't, because the udquot might have a group dquot attached
698 * already and getting rid of that gets us into lock ordering constraints.
699 * The process is complicated more by the fact that the dquots may or may not
700 * be locked on entry.
701 */ 639 */
702STATIC void 640STATIC void
703xfs_qm_dqattach_grouphint( 641xfs_qm_dqattach_grouphint(
@@ -708,45 +646,17 @@ xfs_qm_dqattach_grouphint(
708 646
709 xfs_dqlock(udq); 647 xfs_dqlock(udq);
710 648
711 if ((tmp = udq->q_gdquot)) { 649 tmp = udq->q_gdquot;
712 if (tmp == gdq) { 650 if (tmp) {
713 xfs_dqunlock(udq); 651 if (tmp == gdq)
714 return; 652 goto done;
715 }
716 653
717 udq->q_gdquot = NULL; 654 udq->q_gdquot = NULL;
718 /*
719 * We can't keep any dqlocks when calling dqrele,
720 * because the freelist lock comes before dqlocks.
721 */
722 xfs_dqunlock(udq);
723 /*
724 * we took a hard reference once upon a time in dqget,
725 * so give it back when the udquot no longer points at it
726 * dqput() does the unlocking of the dquot.
727 */
728 xfs_qm_dqrele(tmp); 655 xfs_qm_dqrele(tmp);
729
730 xfs_dqlock(udq);
731 xfs_dqlock(gdq);
732
733 } else {
734 ASSERT(XFS_DQ_IS_LOCKED(udq));
735 xfs_dqlock(gdq);
736 }
737
738 ASSERT(XFS_DQ_IS_LOCKED(udq));
739 ASSERT(XFS_DQ_IS_LOCKED(gdq));
740 /*
741 * Somebody could have attached a gdquot here,
742 * when we dropped the uqlock. If so, just do nothing.
743 */
744 if (udq->q_gdquot == NULL) {
745 XFS_DQHOLD(gdq);
746 udq->q_gdquot = gdq;
747 } 656 }
748 657
749 xfs_dqunlock(gdq); 658 udq->q_gdquot = xfs_qm_dqhold(gdq);
659done:
750 xfs_dqunlock(udq); 660 xfs_dqunlock(udq);
751} 661}
752 662
@@ -813,17 +723,13 @@ xfs_qm_dqattach_locked(
813 ASSERT(ip->i_gdquot); 723 ASSERT(ip->i_gdquot);
814 724
815 /* 725 /*
816 * We may or may not have the i_udquot locked at this point, 726 * We do not have i_udquot locked at this point, but this check
817 * but this check is OK since we don't depend on the i_gdquot to 727 * is OK since we don't depend on the i_gdquot to be accurate
818 * be accurate 100% all the time. It is just a hint, and this 728 * 100% all the time. It is just a hint, and this will
819 * will succeed in general. 729 * succeed in general.
820 */
821 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
822 goto done;
823 /*
824 * Attach i_gdquot to the gdquot hint inside the i_udquot.
825 */ 730 */
826 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); 731 if (ip->i_udquot->q_gdquot != ip->i_gdquot)
732 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
827 } 733 }
828 734
829 done: 735 done:
@@ -879,100 +785,6 @@ xfs_qm_dqdetach(
879 } 785 }
880} 786}
881 787
882int
883xfs_qm_sync(
884 struct xfs_mount *mp,
885 int flags)
886{
887 struct xfs_quotainfo *q = mp->m_quotainfo;
888 int recl, restarts;
889 struct xfs_dquot *dqp;
890 int error;
891
892 if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
893 return 0;
894
895 restarts = 0;
896
897 again:
898 mutex_lock(&q->qi_dqlist_lock);
899 /*
900 * dqpurge_all() also takes the mplist lock and iterate thru all dquots
901 * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
902 * when we have the mplist lock, we know that dquots will be consistent
903 * as long as we have it locked.
904 */
905 if (!XFS_IS_QUOTA_ON(mp)) {
906 mutex_unlock(&q->qi_dqlist_lock);
907 return 0;
908 }
909 ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
910 list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
911 /*
912 * If this is vfs_sync calling, then skip the dquots that
913 * don't 'seem' to be dirty. ie. don't acquire dqlock.
914 * This is very similar to what xfs_sync does with inodes.
915 */
916 if (flags & SYNC_TRYLOCK) {
917 if (!XFS_DQ_IS_DIRTY(dqp))
918 continue;
919 if (!xfs_qm_dqlock_nowait(dqp))
920 continue;
921 } else {
922 xfs_dqlock(dqp);
923 }
924
925 /*
926 * Now, find out for sure if this dquot is dirty or not.
927 */
928 if (! XFS_DQ_IS_DIRTY(dqp)) {
929 xfs_dqunlock(dqp);
930 continue;
931 }
932
933 /* XXX a sentinel would be better */
934 recl = q->qi_dqreclaims;
935 if (!xfs_dqflock_nowait(dqp)) {
936 if (flags & SYNC_TRYLOCK) {
937 xfs_dqunlock(dqp);
938 continue;
939 }
940 /*
941 * If we can't grab the flush lock then if the caller
942 * really wanted us to give this our best shot, so
943 * see if we can give a push to the buffer before we wait
944 * on the flush lock. At this point, we know that
945 * even though the dquot is being flushed,
946 * it has (new) dirty data.
947 */
948 xfs_qm_dqflock_pushbuf_wait(dqp);
949 }
950 /*
951 * Let go of the mplist lock. We don't want to hold it
952 * across a disk write
953 */
954 mutex_unlock(&q->qi_dqlist_lock);
955 error = xfs_qm_dqflush(dqp, flags);
956 xfs_dqunlock(dqp);
957 if (error && XFS_FORCED_SHUTDOWN(mp))
958 return 0; /* Need to prevent umount failure */
959 else if (error)
960 return error;
961
962 mutex_lock(&q->qi_dqlist_lock);
963 if (recl != q->qi_dqreclaims) {
964 if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
965 break;
966
967 mutex_unlock(&q->qi_dqlist_lock);
968 goto again;
969 }
970 }
971
972 mutex_unlock(&q->qi_dqlist_lock);
973 return 0;
974}
975
976/* 788/*
977 * The hash chains and the mplist use the same xfs_dqhash structure as 789 * The hash chains and the mplist use the same xfs_dqhash structure as
978 * their list head, but we can take the mplist qh_lock and one of the 790 * their list head, but we can take the mplist qh_lock and one of the
@@ -1034,18 +846,21 @@ xfs_qm_init_quotainfo(
1034 /* 846 /*
1035 * We try to get the limits from the superuser's limits fields. 847 * We try to get the limits from the superuser's limits fields.
1036 * This is quite hacky, but it is standard quota practice. 848 * This is quite hacky, but it is standard quota practice.
849 *
1037 * We look at the USR dquot with id == 0 first, but if user quotas 850 * We look at the USR dquot with id == 0 first, but if user quotas
1038 * are not enabled we goto the GRP dquot with id == 0. 851 * are not enabled we goto the GRP dquot with id == 0.
1039 * We don't really care to keep separate default limits for user 852 * We don't really care to keep separate default limits for user
1040 * and group quotas, at least not at this point. 853 * and group quotas, at least not at this point.
854 *
855 * Since we may not have done a quotacheck by this point, just read
856 * the dquot without attaching it to any hashtables or lists.
1041 */ 857 */
1042 error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, 858 error = xfs_qm_dqread(mp, 0,
1043 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 859 XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
1044 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : 860 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1045 XFS_DQ_PROJ), 861 XFS_DQ_PROJ),
1046 XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, 862 XFS_QMOPT_DOWARN, &dqp);
1047 &dqp); 863 if (!error) {
1048 if (! error) {
1049 xfs_disk_dquot_t *ddqp = &dqp->q_core; 864 xfs_disk_dquot_t *ddqp = &dqp->q_core;
1050 865
1051 /* 866 /*
@@ -1072,11 +887,6 @@ xfs_qm_init_quotainfo(
1072 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); 887 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1073 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); 888 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1074 889
1075 /*
1076 * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1077 * we don't want this dquot cached. We haven't done a
1078 * quotacheck yet, and quotacheck doesn't like incore dquots.
1079 */
1080 xfs_qm_dqdestroy(dqp); 890 xfs_qm_dqdestroy(dqp);
1081 } else { 891 } else {
1082 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; 892 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -1661,7 +1471,7 @@ xfs_qm_quotacheck(
1661 * successfully. 1471 * successfully.
1662 */ 1472 */
1663 if (!error) 1473 if (!error)
1664 error = xfs_qm_dqflush_all(mp, 0); 1474 error = xfs_qm_dqflush_all(mp);
1665 1475
1666 /* 1476 /*
1667 * We can get this error if we couldn't do a dquot allocation inside 1477 * We can get this error if we couldn't do a dquot allocation inside
@@ -1793,59 +1603,33 @@ xfs_qm_init_quotainos(
1793 1603
1794 1604
1795/* 1605/*
1796 * Just pop the least recently used dquot off the freelist and 1606 * Pop the least recently used dquot off the freelist and recycle it.
1797 * recycle it. The returned dquot is locked.
1798 */ 1607 */
1799STATIC xfs_dquot_t * 1608STATIC struct xfs_dquot *
1800xfs_qm_dqreclaim_one(void) 1609xfs_qm_dqreclaim_one(void)
1801{ 1610{
1802 xfs_dquot_t *dqpout; 1611 struct xfs_dquot *dqp;
1803 xfs_dquot_t *dqp; 1612 int restarts = 0;
1804 int restarts;
1805 int startagain;
1806
1807 restarts = 0;
1808 dqpout = NULL;
1809 1613
1810 /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
1811again:
1812 startagain = 0;
1813 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); 1614 mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
1814 1615restart:
1815 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { 1616 list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
1816 struct xfs_mount *mp = dqp->q_mount; 1617 struct xfs_mount *mp = dqp->q_mount;
1817 xfs_dqlock(dqp); 1618
1619 if (!xfs_dqlock_nowait(dqp))
1620 continue;
1818 1621
1819 /* 1622 /*
1820 * We are racing with dqlookup here. Naturally we don't 1623 * This dquot has already been grabbed by dqlookup.
1821 * want to reclaim a dquot that lookup wants. We release the 1624 * Remove it from the freelist and try again.
1822 * freelist lock and start over, so that lookup will grab
1823 * both the dquot and the freelistlock.
1824 */ 1625 */
1825 if (dqp->dq_flags & XFS_DQ_WANT) { 1626 if (dqp->q_nrefs) {
1826 ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
1827
1828 trace_xfs_dqreclaim_want(dqp); 1627 trace_xfs_dqreclaim_want(dqp);
1829 XQM_STATS_INC(xqmstats.xs_qm_dqwants); 1628 XQM_STATS_INC(xqmstats.xs_qm_dqwants);
1830 restarts++;
1831 startagain = 1;
1832 goto dqunlock;
1833 }
1834 1629
1835 /*
1836 * If the dquot is inactive, we are assured that it is
1837 * not on the mplist or the hashlist, and that makes our
1838 * life easier.
1839 */
1840 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
1841 ASSERT(mp == NULL);
1842 ASSERT(! XFS_DQ_IS_DIRTY(dqp));
1843 ASSERT(list_empty(&dqp->q_hashlist));
1844 ASSERT(list_empty(&dqp->q_mplist));
1845 list_del_init(&dqp->q_freelist); 1630 list_del_init(&dqp->q_freelist);
1846 xfs_Gqm->qm_dqfrlist_cnt--; 1631 xfs_Gqm->qm_dqfrlist_cnt--;
1847 dqpout = dqp; 1632 restarts++;
1848 XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
1849 goto dqunlock; 1633 goto dqunlock;
1850 } 1634 }
1851 1635
@@ -1874,64 +1658,49 @@ again:
1874 * We flush it delayed write, so don't bother 1658 * We flush it delayed write, so don't bother
1875 * releasing the freelist lock. 1659 * releasing the freelist lock.
1876 */ 1660 */
1877 error = xfs_qm_dqflush(dqp, 0); 1661 error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
1878 if (error) { 1662 if (error) {
1879 xfs_warn(mp, "%s: dquot %p flush failed", 1663 xfs_warn(mp, "%s: dquot %p flush failed",
1880 __func__, dqp); 1664 __func__, dqp);
1881 } 1665 }
1882 goto dqunlock; 1666 goto dqunlock;
1883 } 1667 }
1668 xfs_dqfunlock(dqp);
1884 1669
1885 /* 1670 /*
1886 * We're trying to get the hashlock out of order. This races 1671 * Prevent lookup now that we are going to reclaim the dquot.
1887 * with dqlookup; so, we giveup and goto the next dquot if 1672 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
1888 * we couldn't get the hashlock. This way, we won't starve 1673 * thus we can drop the lock now.
1889 * a dqlookup process that holds the hashlock that is
1890 * waiting for the freelist lock.
1891 */ 1674 */
1892 if (!mutex_trylock(&dqp->q_hash->qh_lock)) { 1675 dqp->dq_flags |= XFS_DQ_FREEING;
1893 restarts++; 1676 xfs_dqunlock(dqp);
1894 goto dqfunlock;
1895 }
1896 1677
1897 /* 1678 mutex_lock(&dqp->q_hash->qh_lock);
1898 * This races with dquot allocation code as well as dqflush_all 1679 list_del_init(&dqp->q_hashlist);
1899 * and reclaim code. So, if we failed to grab the mplist lock, 1680 dqp->q_hash->qh_version++;
1900 * giveup everything and start over. 1681 mutex_unlock(&dqp->q_hash->qh_lock);
1901 */
1902 if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
1903 restarts++;
1904 startagain = 1;
1905 goto qhunlock;
1906 }
1907 1682
1908 ASSERT(dqp->q_nrefs == 0); 1683 mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
1909 list_del_init(&dqp->q_mplist); 1684 list_del_init(&dqp->q_mplist);
1910 mp->m_quotainfo->qi_dquots--; 1685 mp->m_quotainfo->qi_dquots--;
1911 mp->m_quotainfo->qi_dqreclaims++; 1686 mp->m_quotainfo->qi_dqreclaims++;
1912 list_del_init(&dqp->q_hashlist); 1687 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
1913 dqp->q_hash->qh_version++; 1688
1689 ASSERT(dqp->q_nrefs == 0);
1914 list_del_init(&dqp->q_freelist); 1690 list_del_init(&dqp->q_freelist);
1915 xfs_Gqm->qm_dqfrlist_cnt--; 1691 xfs_Gqm->qm_dqfrlist_cnt--;
1916 dqpout = dqp; 1692
1917 mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); 1693 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1918qhunlock: 1694 return dqp;
1919 mutex_unlock(&dqp->q_hash->qh_lock);
1920dqfunlock:
1921 xfs_dqfunlock(dqp);
1922dqunlock: 1695dqunlock:
1923 xfs_dqunlock(dqp); 1696 xfs_dqunlock(dqp);
1924 if (dqpout)
1925 break;
1926 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) 1697 if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
1927 break; 1698 break;
1928 if (startagain) { 1699 goto restart;
1929 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1930 goto again;
1931 }
1932 } 1700 }
1701
1933 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); 1702 mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
1934 return dqpout; 1703 return NULL;
1935} 1704}
1936 1705
1937/* 1706/*
@@ -2151,10 +1920,7 @@ xfs_qm_vop_dqalloc(
2151 * this to caller 1920 * this to caller
2152 */ 1921 */
2153 ASSERT(ip->i_udquot); 1922 ASSERT(ip->i_udquot);
2154 uq = ip->i_udquot; 1923 uq = xfs_qm_dqhold(ip->i_udquot);
2155 xfs_dqlock(uq);
2156 XFS_DQHOLD(uq);
2157 xfs_dqunlock(uq);
2158 } 1924 }
2159 } 1925 }
2160 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { 1926 if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
@@ -2175,10 +1941,7 @@ xfs_qm_vop_dqalloc(
2175 xfs_ilock(ip, lockflags); 1941 xfs_ilock(ip, lockflags);
2176 } else { 1942 } else {
2177 ASSERT(ip->i_gdquot); 1943 ASSERT(ip->i_gdquot);
2178 gq = ip->i_gdquot; 1944 gq = xfs_qm_dqhold(ip->i_gdquot);
2179 xfs_dqlock(gq);
2180 XFS_DQHOLD(gq);
2181 xfs_dqunlock(gq);
2182 } 1945 }
2183 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { 1946 } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2184 if (xfs_get_projid(ip) != prid) { 1947 if (xfs_get_projid(ip) != prid) {
@@ -2198,10 +1961,7 @@ xfs_qm_vop_dqalloc(
2198 xfs_ilock(ip, lockflags); 1961 xfs_ilock(ip, lockflags);
2199 } else { 1962 } else {
2200 ASSERT(ip->i_gdquot); 1963 ASSERT(ip->i_gdquot);
2201 gq = ip->i_gdquot; 1964 gq = xfs_qm_dqhold(ip->i_gdquot);
2202 xfs_dqlock(gq);
2203 XFS_DQHOLD(gq);
2204 xfs_dqunlock(gq);
2205 } 1965 }
2206 } 1966 }
2207 if (uq) 1967 if (uq)
@@ -2251,14 +2011,10 @@ xfs_qm_vop_chown(
2251 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); 2011 xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2252 2012
2253 /* 2013 /*
2254 * Take an extra reference, because the inode 2014 * Take an extra reference, because the inode is going to keep
2255 * is going to keep this dquot pointer even 2015 * this dquot pointer even after the trans_commit.
2256 * after the trans_commit.
2257 */ 2016 */
2258 xfs_dqlock(newdq); 2017 *IO_olddq = xfs_qm_dqhold(newdq);
2259 XFS_DQHOLD(newdq);
2260 xfs_dqunlock(newdq);
2261 *IO_olddq = newdq;
2262 2018
2263 return prevdq; 2019 return prevdq;
2264} 2020}
@@ -2390,25 +2146,21 @@ xfs_qm_vop_create_dqattach(
2390 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 2146 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2391 2147
2392 if (udqp) { 2148 if (udqp) {
2393 xfs_dqlock(udqp);
2394 XFS_DQHOLD(udqp);
2395 xfs_dqunlock(udqp);
2396 ASSERT(ip->i_udquot == NULL); 2149 ASSERT(ip->i_udquot == NULL);
2397 ip->i_udquot = udqp;
2398 ASSERT(XFS_IS_UQUOTA_ON(mp)); 2150 ASSERT(XFS_IS_UQUOTA_ON(mp));
2399 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); 2151 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2152
2153 ip->i_udquot = xfs_qm_dqhold(udqp);
2400 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); 2154 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2401 } 2155 }
2402 if (gdqp) { 2156 if (gdqp) {
2403 xfs_dqlock(gdqp);
2404 XFS_DQHOLD(gdqp);
2405 xfs_dqunlock(gdqp);
2406 ASSERT(ip->i_gdquot == NULL); 2157 ASSERT(ip->i_gdquot == NULL);
2407 ip->i_gdquot = gdqp;
2408 ASSERT(XFS_IS_OQUOTA_ON(mp)); 2158 ASSERT(XFS_IS_OQUOTA_ON(mp));
2409 ASSERT((XFS_IS_GQUOTA_ON(mp) ? 2159 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
2410 ip->i_d.di_gid : xfs_get_projid(ip)) == 2160 ip->i_d.di_gid : xfs_get_projid(ip)) ==
2411 be32_to_cpu(gdqp->q_core.d_id)); 2161 be32_to_cpu(gdqp->q_core.d_id));
2162
2163 ip->i_gdquot = xfs_qm_dqhold(gdqp);
2412 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); 2164 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2413 } 2165 }
2414} 2166}
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 43b9abe1052c..9b4f3adefbc5 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -33,12 +33,6 @@ extern kmem_zone_t *qm_dqzone;
33extern kmem_zone_t *qm_dqtrxzone; 33extern kmem_zone_t *qm_dqtrxzone;
34 34
35/* 35/*
36 * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
37 * iterate over the mountpt's dquot list in one call.
38 */
39#define XFS_QM_SYNC_MAX_RESTARTS 7
40
41/*
42 * Ditto, for xfs_qm_dqreclaim_one. 36 * Ditto, for xfs_qm_dqreclaim_one.
43 */ 37 */
44#define XFS_QM_RECLAIM_MAX_RESTARTS 4 38#define XFS_QM_RECLAIM_MAX_RESTARTS 4
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index a595f29567fe..8a0807e0f979 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,8 +87,7 @@ typedef struct xfs_dqblk {
87#define XFS_DQ_PROJ 0x0002 /* project quota */ 87#define XFS_DQ_PROJ 0x0002 /* project quota */
88#define XFS_DQ_GROUP 0x0004 /* a group quota */ 88#define XFS_DQ_GROUP 0x0004 /* a group quota */
89#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ 89#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
90#define XFS_DQ_WANT 0x0010 /* for lookup/reclaim race */ 90#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
91#define XFS_DQ_INACTIVE 0x0020 /* dq off mplist & hashlist */
92 91
93#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) 92#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
94 93
@@ -97,8 +96,7 @@ typedef struct xfs_dqblk {
97 { XFS_DQ_PROJ, "PROJ" }, \ 96 { XFS_DQ_PROJ, "PROJ" }, \
98 { XFS_DQ_GROUP, "GROUP" }, \ 97 { XFS_DQ_GROUP, "GROUP" }, \
99 { XFS_DQ_DIRTY, "DIRTY" }, \ 98 { XFS_DQ_DIRTY, "DIRTY" }, \
100 { XFS_DQ_WANT, "WANT" }, \ 99 { XFS_DQ_FREEING, "FREEING" }
101 { XFS_DQ_INACTIVE, "INACTIVE" }
102 100
103/* 101/*
104 * In the worst case, when both user and group quotas are on, 102 * In the worst case, when both user and group quotas are on,
@@ -199,7 +197,6 @@ typedef struct xfs_qoff_logformat {
199#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */ 197#define XFS_QMOPT_UQUOTA 0x0000004 /* user dquot requested */
200#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */ 198#define XFS_QMOPT_PQUOTA 0x0000008 /* project dquot requested */
201#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */ 199#define XFS_QMOPT_FORCE_RES 0x0000010 /* ignore quota limits */
202#define XFS_QMOPT_DQSUSER 0x0000020 /* don't cache super users dquot */
203#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */ 200#define XFS_QMOPT_SBVERSION 0x0000040 /* change superblock version num */
204#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */ 201#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
205#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ 202#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
@@ -326,7 +323,6 @@ extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
326extern void xfs_qm_dqdetach(struct xfs_inode *); 323extern void xfs_qm_dqdetach(struct xfs_inode *);
327extern void xfs_qm_dqrele(struct xfs_dquot *); 324extern void xfs_qm_dqrele(struct xfs_dquot *);
328extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *); 325extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
329extern int xfs_qm_sync(struct xfs_mount *, int);
330extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *); 326extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
331extern void xfs_qm_mount_quotas(struct xfs_mount *); 327extern void xfs_qm_mount_quotas(struct xfs_mount *);
332extern void xfs_qm_unmount(struct xfs_mount *); 328extern void xfs_qm_unmount(struct xfs_mount *);
@@ -366,10 +362,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
366#define xfs_qm_dqdetach(ip) 362#define xfs_qm_dqdetach(ip)
367#define xfs_qm_dqrele(d) 363#define xfs_qm_dqrele(d)
368#define xfs_qm_statvfs(ip, s) 364#define xfs_qm_statvfs(ip, s)
369static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
370{
371 return 0;
372}
373#define xfs_qm_newmount(mp, a, b) (0) 365#define xfs_qm_newmount(mp, a, b) (0)
374#define xfs_qm_mount_quotas(mp) 366#define xfs_qm_mount_quotas(mp)
375#define xfs_qm_unmount(mp) 367#define xfs_qm_unmount(mp)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 3eca58f51ae9..281961c1d81a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -199,7 +199,6 @@ xfs_parseargs(
199 mp->m_flags |= XFS_MOUNT_BARRIER; 199 mp->m_flags |= XFS_MOUNT_BARRIER;
200 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 200 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
201 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 201 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
202 mp->m_flags |= XFS_MOUNT_DELAYLOG;
203 202
204 /* 203 /*
205 * These can be overridden by the mount option parsing. 204 * These can be overridden by the mount option parsing.
@@ -353,11 +352,11 @@ xfs_parseargs(
353 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); 352 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
354 mp->m_qflags &= ~XFS_OQUOTA_ENFD; 353 mp->m_qflags &= ~XFS_OQUOTA_ENFD;
355 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { 354 } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
356 mp->m_flags |= XFS_MOUNT_DELAYLOG; 355 xfs_warn(mp,
356 "delaylog is the default now, option is deprecated.");
357 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 357 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
358 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
359 xfs_warn(mp, 358 xfs_warn(mp,
360 "nodelaylog is deprecated and will be removed in Linux 3.3"); 359 "nodelaylog support has been removed, option is deprecated.");
361 } else if (!strcmp(this_char, MNTOPT_DISCARD)) { 360 } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
362 mp->m_flags |= XFS_MOUNT_DISCARD; 361 mp->m_flags |= XFS_MOUNT_DISCARD;
363 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { 362 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
@@ -395,13 +394,6 @@ xfs_parseargs(
395 return EINVAL; 394 return EINVAL;
396 } 395 }
397 396
398 if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
399 !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
400 xfs_warn(mp,
401 "the discard option is incompatible with the nodelaylog option");
402 return EINVAL;
403 }
404
405#ifndef CONFIG_XFS_QUOTA 397#ifndef CONFIG_XFS_QUOTA
406 if (XFS_IS_QUOTA_RUNNING(mp)) { 398 if (XFS_IS_QUOTA_RUNNING(mp)) {
407 xfs_warn(mp, "quota support not available in this kernel."); 399 xfs_warn(mp, "quota support not available in this kernel.");
@@ -501,7 +493,6 @@ xfs_showargs(
501 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, 493 { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
502 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, 494 { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
503 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, 495 { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
504 { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
505 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, 496 { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
506 { 0, NULL } 497 { 0, NULL }
507 }; 498 };
@@ -869,27 +860,6 @@ xfs_fs_dirty_inode(
869} 860}
870 861
871STATIC int 862STATIC int
872xfs_log_inode(
873 struct xfs_inode *ip)
874{
875 struct xfs_mount *mp = ip->i_mount;
876 struct xfs_trans *tp;
877 int error;
878
879 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
880 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
881 if (error) {
882 xfs_trans_cancel(tp, 0);
883 return error;
884 }
885
886 xfs_ilock(ip, XFS_ILOCK_EXCL);
887 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
888 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
889 return xfs_trans_commit(tp, 0);
890}
891
892STATIC int
893xfs_fs_write_inode( 863xfs_fs_write_inode(
894 struct inode *inode, 864 struct inode *inode,
895 struct writeback_control *wbc) 865 struct writeback_control *wbc)
@@ -902,10 +872,8 @@ xfs_fs_write_inode(
902 872
903 if (XFS_FORCED_SHUTDOWN(mp)) 873 if (XFS_FORCED_SHUTDOWN(mp))
904 return -XFS_ERROR(EIO); 874 return -XFS_ERROR(EIO);
905 if (!ip->i_update_core)
906 return 0;
907 875
908 if (wbc->sync_mode == WB_SYNC_ALL) { 876 if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
909 /* 877 /*
910 * Make sure the inode has made it it into the log. Instead 878 * Make sure the inode has made it it into the log. Instead
911 * of forcing it all the way to stable storage using a 879 * of forcing it all the way to stable storage using a
@@ -913,11 +881,14 @@ xfs_fs_write_inode(
913 * ->sync_fs call do that for thus, which reduces the number 881 * ->sync_fs call do that for thus, which reduces the number
914 * of synchronous log forces dramatically. 882 * of synchronous log forces dramatically.
915 */ 883 */
916 error = xfs_log_inode(ip); 884 error = xfs_log_dirty_inode(ip, NULL, 0);
917 if (error) 885 if (error)
918 goto out; 886 goto out;
919 return 0; 887 return 0;
920 } else { 888 } else {
889 if (!ip->i_update_core)
890 return 0;
891
921 /* 892 /*
922 * We make this non-blocking if the inode is contended, return 893 * We make this non-blocking if the inode is contended, return
923 * EAGAIN to indicate to the caller that they did not succeed. 894 * EAGAIN to indicate to the caller that they did not succeed.
@@ -1034,17 +1005,10 @@ xfs_fs_sync_fs(
1034 int error; 1005 int error;
1035 1006
1036 /* 1007 /*
1037 * Not much we can do for the first async pass. Writing out the 1008 * Doing anything during the async pass would be counterproductive.
1038 * superblock would be counter-productive as we are going to redirty
1039 * when writing out other data and metadata (and writing out a single
1040 * block is quite fast anyway).
1041 *
1042 * Try to asynchronously kick off quota syncing at least.
1043 */ 1009 */
1044 if (!wait) { 1010 if (!wait)
1045 xfs_qm_sync(mp, SYNC_TRYLOCK);
1046 return 0; 1011 return 0;
1047 }
1048 1012
1049 error = xfs_quiesce_data(mp); 1013 error = xfs_quiesce_data(mp);
1050 if (error) 1014 if (error)
@@ -1258,9 +1222,9 @@ xfs_fs_unfreeze(
1258STATIC int 1222STATIC int
1259xfs_fs_show_options( 1223xfs_fs_show_options(
1260 struct seq_file *m, 1224 struct seq_file *m,
1261 struct vfsmount *mnt) 1225 struct dentry *root)
1262{ 1226{
1263 return -xfs_showargs(XFS_M(mnt->mnt_sb), m); 1227 return -xfs_showargs(XFS_M(root->d_sb), m);
1264} 1228}
1265 1229
1266/* 1230/*
@@ -1641,12 +1605,12 @@ STATIC int __init
1641xfs_init_workqueues(void) 1605xfs_init_workqueues(void)
1642{ 1606{
1643 /* 1607 /*
1644 * max_active is set to 8 to give enough concurency to allow 1608 * We never want to the same work item to run twice, reclaiming inodes
1645 * multiple work operations on each CPU to run. This allows multiple 1609 * or idling the log is not going to get any faster by multiple CPUs
1646 * filesystems to be running sync work concurrently, and scales with 1610 * competing for ressources. Use the default large max_active value
1647 * the number of CPUs in the system. 1611 * so that even lots of filesystems can perform these task in parallel.
1648 */ 1612 */
1649 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); 1613 xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
1650 if (!xfs_syncd_wq) 1614 if (!xfs_syncd_wq)
1651 return -ENOMEM; 1615 return -ENOMEM;
1652 return 0; 1616 return 0;
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index be5c51d8f757..72c01a1c16e7 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -336,6 +336,32 @@ xfs_sync_fsdata(
336 return error; 336 return error;
337} 337}
338 338
339int
340xfs_log_dirty_inode(
341 struct xfs_inode *ip,
342 struct xfs_perag *pag,
343 int flags)
344{
345 struct xfs_mount *mp = ip->i_mount;
346 struct xfs_trans *tp;
347 int error;
348
349 if (!ip->i_update_core)
350 return 0;
351
352 tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
353 error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
354 if (error) {
355 xfs_trans_cancel(tp, 0);
356 return error;
357 }
358
359 xfs_ilock(ip, XFS_ILOCK_EXCL);
360 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
361 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
362 return xfs_trans_commit(tp, 0);
363}
364
339/* 365/*
340 * When remounting a filesystem read-only or freezing the filesystem, we have 366 * When remounting a filesystem read-only or freezing the filesystem, we have
341 * two phases to execute. This first phase is syncing the data before we 367 * two phases to execute. This first phase is syncing the data before we
@@ -359,10 +385,17 @@ xfs_quiesce_data(
359{ 385{
360 int error, error2 = 0; 386 int error, error2 = 0;
361 387
362 xfs_qm_sync(mp, SYNC_TRYLOCK); 388 /*
363 xfs_qm_sync(mp, SYNC_WAIT); 389 * Log all pending size and timestamp updates. The vfs writeback
390 * code is supposed to do this, but due to its overagressive
391 * livelock detection it will skip inodes where appending writes
392 * were written out in the first non-blocking sync phase if their
393 * completion took long enough that it happened after taking the
394 * timestamp for the cut-off in the blocking phase.
395 */
396 xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
364 397
365 /* force out the newly dirtied log buffers */ 398 /* force out the log */
366 xfs_log_force(mp, XFS_LOG_SYNC); 399 xfs_log_force(mp, XFS_LOG_SYNC);
367 400
368 /* write superblock and hoover up shutdown errors */ 401 /* write superblock and hoover up shutdown errors */
@@ -470,7 +503,6 @@ xfs_sync_worker(
470 error = xfs_fs_log_dummy(mp); 503 error = xfs_fs_log_dummy(mp);
471 else 504 else
472 xfs_log_force(mp, 0); 505 xfs_log_force(mp, 0);
473 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
474 506
475 /* start pushing all the metadata that is currently dirty */ 507 /* start pushing all the metadata that is currently dirty */
476 xfs_ail_push_all(mp->m_ail); 508 xfs_ail_push_all(mp->m_ail);
diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h
index 941202e7ac6e..fa965479d788 100644
--- a/fs/xfs/xfs_sync.h
+++ b/fs/xfs/xfs_sync.h
@@ -34,6 +34,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
34 34
35void xfs_flush_inodes(struct xfs_inode *ip); 35void xfs_flush_inodes(struct xfs_inode *ip);
36 36
37int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
38
37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 39int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
38int xfs_reclaim_inodes_count(struct xfs_mount *mp); 40int xfs_reclaim_inodes_count(struct xfs_mount *mp);
39void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); 41void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 494035798873..a9d5b1e06efe 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -743,8 +743,6 @@ DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
743DEFINE_DQUOT_EVENT(xfs_dqread); 743DEFINE_DQUOT_EVENT(xfs_dqread);
744DEFINE_DQUOT_EVENT(xfs_dqread_fail); 744DEFINE_DQUOT_EVENT(xfs_dqread_fail);
745DEFINE_DQUOT_EVENT(xfs_dqlookup_found); 745DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
746DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
747DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
748DEFINE_DQUOT_EVENT(xfs_dqlookup_done); 746DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
749DEFINE_DQUOT_EVENT(xfs_dqget_hit); 747DEFINE_DQUOT_EVENT(xfs_dqget_hit);
750DEFINE_DQUOT_EVENT(xfs_dqget_miss); 748DEFINE_DQUOT_EVENT(xfs_dqget_miss);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 1f35b2feca97..329b06aba1c2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1158,7 +1158,6 @@ xfs_trans_add_item(
1158 1158
1159 lidp->lid_item = lip; 1159 lidp->lid_item = lip;
1160 lidp->lid_flags = 0; 1160 lidp->lid_flags = 0;
1161 lidp->lid_size = 0;
1162 list_add_tail(&lidp->lid_trans, &tp->t_items); 1161 list_add_tail(&lidp->lid_trans, &tp->t_items);
1163 1162
1164 lip->li_desc = lidp; 1163 lip->li_desc = lidp;
@@ -1210,219 +1209,6 @@ xfs_trans_free_items(
1210 } 1209 }
1211} 1210}
1212 1211
1213/*
1214 * Unlock the items associated with a transaction.
1215 *
1216 * Items which were not logged should be freed. Those which were logged must
1217 * still be tracked so they can be unpinned when the transaction commits.
1218 */
1219STATIC void
1220xfs_trans_unlock_items(
1221 struct xfs_trans *tp,
1222 xfs_lsn_t commit_lsn)
1223{
1224 struct xfs_log_item_desc *lidp, *next;
1225
1226 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1227 struct xfs_log_item *lip = lidp->lid_item;
1228
1229 lip->li_desc = NULL;
1230
1231 if (commit_lsn != NULLCOMMITLSN)
1232 IOP_COMMITTING(lip, commit_lsn);
1233 IOP_UNLOCK(lip);
1234
1235 /*
1236 * Free the descriptor if the item is not dirty
1237 * within this transaction.
1238 */
1239 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1240 xfs_trans_free_item_desc(lidp);
1241 }
1242}
1243
1244/*
1245 * Total up the number of log iovecs needed to commit this
1246 * transaction. The transaction itself needs one for the
1247 * transaction header. Ask each dirty item in turn how many
1248 * it needs to get the total.
1249 */
1250static uint
1251xfs_trans_count_vecs(
1252 struct xfs_trans *tp)
1253{
1254 int nvecs;
1255 struct xfs_log_item_desc *lidp;
1256
1257 nvecs = 1;
1258
1259 /* In the non-debug case we need to start bailing out if we
1260 * didn't find a log_item here, return zero and let trans_commit
1261 * deal with it.
1262 */
1263 if (list_empty(&tp->t_items)) {
1264 ASSERT(0);
1265 return 0;
1266 }
1267
1268 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1269 /*
1270 * Skip items which aren't dirty in this transaction.
1271 */
1272 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1273 continue;
1274 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1275 nvecs += lidp->lid_size;
1276 }
1277
1278 return nvecs;
1279}
1280
1281/*
1282 * Fill in the vector with pointers to data to be logged
1283 * by this transaction. The transaction header takes
1284 * the first vector, and then each dirty item takes the
1285 * number of vectors it indicated it needed in xfs_trans_count_vecs().
1286 *
1287 * As each item fills in the entries it needs, also pin the item
1288 * so that it cannot be flushed out until the log write completes.
1289 */
1290static void
1291xfs_trans_fill_vecs(
1292 struct xfs_trans *tp,
1293 struct xfs_log_iovec *log_vector)
1294{
1295 struct xfs_log_item_desc *lidp;
1296 struct xfs_log_iovec *vecp;
1297 uint nitems;
1298
1299 /*
1300 * Skip over the entry for the transaction header, we'll
1301 * fill that in at the end.
1302 */
1303 vecp = log_vector + 1;
1304
1305 nitems = 0;
1306 ASSERT(!list_empty(&tp->t_items));
1307 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1308 /* Skip items which aren't dirty in this transaction. */
1309 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1310 continue;
1311
1312 /*
1313 * The item may be marked dirty but not log anything. This can
1314 * be used to get called when a transaction is committed.
1315 */
1316 if (lidp->lid_size)
1317 nitems++;
1318 IOP_FORMAT(lidp->lid_item, vecp);
1319 vecp += lidp->lid_size;
1320 IOP_PIN(lidp->lid_item);
1321 }
1322
1323 /*
1324 * Now that we've counted the number of items in this transaction, fill
1325 * in the transaction header. Note that the transaction header does not
1326 * have a log item.
1327 */
1328 tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
1329 tp->t_header.th_type = tp->t_type;
1330 tp->t_header.th_num_items = nitems;
1331 log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
1332 log_vector->i_len = sizeof(xfs_trans_header_t);
1333 log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
1334}
1335
1336/*
1337 * The committed item processing consists of calling the committed routine of
1338 * each logged item, updating the item's position in the AIL if necessary, and
1339 * unpinning each item. If the committed routine returns -1, then do nothing
1340 * further with the item because it may have been freed.
1341 *
1342 * Since items are unlocked when they are copied to the incore log, it is
1343 * possible for two transactions to be completing and manipulating the same
1344 * item simultaneously. The AIL lock will protect the lsn field of each item.
1345 * The value of this field can never go backwards.
1346 *
1347 * We unpin the items after repositioning them in the AIL, because otherwise
1348 * they could be immediately flushed and we'd have to race with the flusher
1349 * trying to pull the item from the AIL as we add it.
1350 */
1351static void
1352xfs_trans_item_committed(
1353 struct xfs_log_item *lip,
1354 xfs_lsn_t commit_lsn,
1355 int aborted)
1356{
1357 xfs_lsn_t item_lsn;
1358 struct xfs_ail *ailp;
1359
1360 if (aborted)
1361 lip->li_flags |= XFS_LI_ABORTED;
1362 item_lsn = IOP_COMMITTED(lip, commit_lsn);
1363
1364 /* item_lsn of -1 means the item needs no further processing */
1365 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
1366 return;
1367
1368 /*
1369 * If the returned lsn is greater than what it contained before, update
1370 * the location of the item in the AIL. If it is not, then do nothing.
1371 * Items can never move backwards in the AIL.
1372 *
1373 * While the new lsn should usually be greater, it is possible that a
1374 * later transaction completing simultaneously with an earlier one
1375 * using the same item could complete first with a higher lsn. This
1376 * would cause the earlier transaction to fail the test below.
1377 */
1378 ailp = lip->li_ailp;
1379 spin_lock(&ailp->xa_lock);
1380 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
1381 /*
1382 * This will set the item's lsn to item_lsn and update the
1383 * position of the item in the AIL.
1384 *
1385 * xfs_trans_ail_update() drops the AIL lock.
1386 */
1387 xfs_trans_ail_update(ailp, lip, item_lsn);
1388 } else {
1389 spin_unlock(&ailp->xa_lock);
1390 }
1391
1392 /*
1393 * Now that we've repositioned the item in the AIL, unpin it so it can
1394 * be flushed. Pass information about buffer stale state down from the
1395 * log item flags, if anyone else stales the buffer we do not want to
1396 * pay any attention to it.
1397 */
1398 IOP_UNPIN(lip, 0);
1399}
1400
1401/*
1402 * This is typically called by the LM when a transaction has been fully
1403 * committed to disk. It needs to unpin the items which have
1404 * been logged by the transaction and update their positions
1405 * in the AIL if necessary.
1406 *
1407 * This also gets called when the transactions didn't get written out
1408 * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
1409 */
1410STATIC void
1411xfs_trans_committed(
1412 void *arg,
1413 int abortflag)
1414{
1415 struct xfs_trans *tp = arg;
1416 struct xfs_log_item_desc *lidp, *next;
1417
1418 list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
1419 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
1420 xfs_trans_free_item_desc(lidp);
1421 }
1422
1423 xfs_trans_free(tp);
1424}
1425
1426static inline void 1212static inline void
1427xfs_log_item_batch_insert( 1213xfs_log_item_batch_insert(
1428 struct xfs_ail *ailp, 1214 struct xfs_ail *ailp,
@@ -1538,258 +1324,6 @@ xfs_trans_committed_bulk(
1538} 1324}
1539 1325
1540/* 1326/*
1541 * Called from the trans_commit code when we notice that the filesystem is in
1542 * the middle of a forced shutdown.
1543 *
1544 * When we are called here, we have already pinned all the items in the
1545 * transaction. However, neither IOP_COMMITTING or IOP_UNLOCK has been called
1546 * so we can simply walk the items in the transaction, unpin them with an abort
1547 * flag and then free the items. Note that unpinning the items can result in
1548 * them being freed immediately, so we need to use a safe list traversal method
1549 * here.
1550 */
1551STATIC void
1552xfs_trans_uncommit(
1553 struct xfs_trans *tp,
1554 uint flags)
1555{
1556 struct xfs_log_item_desc *lidp, *n;
1557
1558 list_for_each_entry_safe(lidp, n, &tp->t_items, lid_trans) {
1559 if (lidp->lid_flags & XFS_LID_DIRTY)
1560 IOP_UNPIN(lidp->lid_item, 1);
1561 }
1562
1563 xfs_trans_unreserve_and_mod_sb(tp);
1564 xfs_trans_unreserve_and_mod_dquots(tp);
1565
1566 xfs_trans_free_items(tp, NULLCOMMITLSN, flags);
1567 xfs_trans_free(tp);
1568}
1569
1570/*
1571 * Format the transaction direct to the iclog. This isolates the physical
1572 * transaction commit operation from the logical operation and hence allows
1573 * other methods to be introduced without affecting the existing commit path.
1574 */
1575static int
1576xfs_trans_commit_iclog(
1577 struct xfs_mount *mp,
1578 struct xfs_trans *tp,
1579 xfs_lsn_t *commit_lsn,
1580 int flags)
1581{
1582 int shutdown;
1583 int error;
1584 int log_flags = 0;
1585 struct xlog_in_core *commit_iclog;
1586#define XFS_TRANS_LOGVEC_COUNT 16
1587 struct xfs_log_iovec log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
1588 struct xfs_log_iovec *log_vector;
1589 uint nvec;
1590
1591
1592 /*
1593 * Ask each log item how many log_vector entries it will
1594 * need so we can figure out how many to allocate.
1595 * Try to avoid the kmem_alloc() call in the common case
1596 * by using a vector from the stack when it fits.
1597 */
1598 nvec = xfs_trans_count_vecs(tp);
1599 if (nvec == 0) {
1600 return ENOMEM; /* triggers a shutdown! */
1601 } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
1602 log_vector = log_vector_fast;
1603 } else {
1604 log_vector = (xfs_log_iovec_t *)kmem_alloc(nvec *
1605 sizeof(xfs_log_iovec_t),
1606 KM_SLEEP);
1607 }
1608
1609 /*
1610 * Fill in the log_vector and pin the logged items, and
1611 * then write the transaction to the log.
1612 */
1613 xfs_trans_fill_vecs(tp, log_vector);
1614
1615 if (flags & XFS_TRANS_RELEASE_LOG_RES)
1616 log_flags = XFS_LOG_REL_PERM_RESERV;
1617
1618 error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
1619
1620 /*
1621 * The transaction is committed incore here, and can go out to disk
1622 * at any time after this call. However, all the items associated
1623 * with the transaction are still locked and pinned in memory.
1624 */
1625 *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
1626
1627 tp->t_commit_lsn = *commit_lsn;
1628 trace_xfs_trans_commit_lsn(tp);
1629
1630 if (nvec > XFS_TRANS_LOGVEC_COUNT)
1631 kmem_free(log_vector);
1632
1633 /*
1634 * If we got a log write error. Unpin the logitems that we
1635 * had pinned, clean up, free trans structure, and return error.
1636 */
1637 if (error || *commit_lsn == -1) {
1638 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1639 xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
1640 return XFS_ERROR(EIO);
1641 }
1642
1643 /*
1644 * Once the transaction has committed, unused
1645 * reservations need to be released and changes to
1646 * the superblock need to be reflected in the in-core
1647 * version. Do that now.
1648 */
1649 xfs_trans_unreserve_and_mod_sb(tp);
1650
1651 /*
1652 * Tell the LM to call the transaction completion routine
1653 * when the log write with LSN commit_lsn completes (e.g.
1654 * when the transaction commit really hits the on-disk log).
1655 * After this call we cannot reference tp, because the call
1656 * can happen at any time and the call will free the transaction
1657 * structure pointed to by tp. The only case where we call
1658 * the completion routine (xfs_trans_committed) directly is
1659 * if the log is turned off on a debug kernel or we're
1660 * running in simulation mode (the log is explicitly turned
1661 * off).
1662 */
1663 tp->t_logcb.cb_func = xfs_trans_committed;
1664 tp->t_logcb.cb_arg = tp;
1665
1666 /*
1667 * We need to pass the iclog buffer which was used for the
1668 * transaction commit record into this function, and attach
1669 * the callback to it. The callback must be attached before
1670 * the items are unlocked to avoid racing with other threads
1671 * waiting for an item to unlock.
1672 */
1673 shutdown = xfs_log_notify(mp, commit_iclog, &(tp->t_logcb));
1674
1675 /*
1676 * Mark this thread as no longer being in a transaction
1677 */
1678 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1679
1680 /*
1681 * Once all the items of the transaction have been copied
1682 * to the in core log and the callback is attached, the
1683 * items can be unlocked.
1684 *
1685 * This will free descriptors pointing to items which were
1686 * not logged since there is nothing more to do with them.
1687 * For items which were logged, we will keep pointers to them
1688 * so they can be unpinned after the transaction commits to disk.
1689 * This will also stamp each modified meta-data item with
1690 * the commit lsn of this transaction for dependency tracking
1691 * purposes.
1692 */
1693 xfs_trans_unlock_items(tp, *commit_lsn);
1694
1695 /*
1696 * If we detected a log error earlier, finish committing
1697 * the transaction now (unpin log items, etc).
1698 *
1699 * Order is critical here, to avoid using the transaction
1700 * pointer after its been freed (by xfs_trans_committed
1701 * either here now, or as a callback). We cannot do this
1702 * step inside xfs_log_notify as was done earlier because
1703 * of this issue.
1704 */
1705 if (shutdown)
1706 xfs_trans_committed(tp, XFS_LI_ABORTED);
1707
1708 /*
1709 * Now that the xfs_trans_committed callback has been attached,
1710 * and the items are released we can finally allow the iclog to
1711 * go to disk.
1712 */
1713 return xfs_log_release_iclog(mp, commit_iclog);
1714}
1715
1716/*
1717 * Walk the log items and allocate log vector structures for
1718 * each item large enough to fit all the vectors they require.
1719 * Note that this format differs from the old log vector format in
1720 * that there is no transaction header in these log vectors.
1721 */
1722STATIC struct xfs_log_vec *
1723xfs_trans_alloc_log_vecs(
1724 xfs_trans_t *tp)
1725{
1726 struct xfs_log_item_desc *lidp;
1727 struct xfs_log_vec *lv = NULL;
1728 struct xfs_log_vec *ret_lv = NULL;
1729
1730
1731 /* Bail out if we didn't find a log item. */
1732 if (list_empty(&tp->t_items)) {
1733 ASSERT(0);
1734 return NULL;
1735 }
1736
1737 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
1738 struct xfs_log_vec *new_lv;
1739
1740 /* Skip items which aren't dirty in this transaction. */
1741 if (!(lidp->lid_flags & XFS_LID_DIRTY))
1742 continue;
1743
1744 /* Skip items that do not have any vectors for writing */
1745 lidp->lid_size = IOP_SIZE(lidp->lid_item);
1746 if (!lidp->lid_size)
1747 continue;
1748
1749 new_lv = kmem_zalloc(sizeof(*new_lv) +
1750 lidp->lid_size * sizeof(struct xfs_log_iovec),
1751 KM_SLEEP);
1752
1753 /* The allocated iovec region lies beyond the log vector. */
1754 new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1];
1755 new_lv->lv_niovecs = lidp->lid_size;
1756 new_lv->lv_item = lidp->lid_item;
1757 if (!ret_lv)
1758 ret_lv = new_lv;
1759 else
1760 lv->lv_next = new_lv;
1761 lv = new_lv;
1762 }
1763
1764 return ret_lv;
1765}
1766
1767static int
1768xfs_trans_commit_cil(
1769 struct xfs_mount *mp,
1770 struct xfs_trans *tp,
1771 xfs_lsn_t *commit_lsn,
1772 int flags)
1773{
1774 struct xfs_log_vec *log_vector;
1775
1776 /*
1777 * Get each log item to allocate a vector structure for
1778 * the log item to to pass to the log write code. The
1779 * CIL commit code will format the vector and save it away.
1780 */
1781 log_vector = xfs_trans_alloc_log_vecs(tp);
1782 if (!log_vector)
1783 return ENOMEM;
1784
1785 xfs_log_commit_cil(mp, tp, log_vector, commit_lsn, flags);
1786
1787 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1788 xfs_trans_free(tp);
1789 return 0;
1790}
1791
1792/*
1793 * Commit the given transaction to the log. 1327 * Commit the given transaction to the log.
1794 * 1328 *
1795 * XFS disk error handling mechanism is not based on a typical 1329 * XFS disk error handling mechanism is not based on a typical
@@ -1845,17 +1379,16 @@ xfs_trans_commit(
1845 xfs_trans_apply_sb_deltas(tp); 1379 xfs_trans_apply_sb_deltas(tp);
1846 xfs_trans_apply_dquot_deltas(tp); 1380 xfs_trans_apply_dquot_deltas(tp);
1847 1381
1848 if (mp->m_flags & XFS_MOUNT_DELAYLOG) 1382 error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
1849 error = xfs_trans_commit_cil(mp, tp, &commit_lsn, flags);
1850 else
1851 error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
1852
1853 if (error == ENOMEM) { 1383 if (error == ENOMEM) {
1854 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); 1384 xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
1855 error = XFS_ERROR(EIO); 1385 error = XFS_ERROR(EIO);
1856 goto out_unreserve; 1386 goto out_unreserve;
1857 } 1387 }
1858 1388
1389 current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
1390 xfs_trans_free(tp);
1391
1859 /* 1392 /*
1860 * If the transaction needs to be synchronous, then force the 1393 * If the transaction needs to be synchronous, then force the
1861 * log out now and wait for it. 1394 * log out now and wait for it.
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 3ae713c0abd9..f6118703f20d 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -163,9 +163,8 @@ typedef struct xfs_trans_header {
163 */ 163 */
164struct xfs_log_item_desc { 164struct xfs_log_item_desc {
165 struct xfs_log_item *lid_item; 165 struct xfs_log_item *lid_item;
166 ushort lid_size;
167 unsigned char lid_flags;
168 struct list_head lid_trans; 166 struct list_head lid_trans;
167 unsigned char lid_flags;
169}; 168};
170 169
171#define XFS_LID_DIRTY 0x1 170#define XFS_LID_DIRTY 0x1
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 8b32d1a4c5a1..89dbb4a50872 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -53,7 +53,7 @@ xfs_dir_ialloc(
53 output: may be a new transaction. */ 53 output: may be a new transaction. */
54 xfs_inode_t *dp, /* directory within whose allocate 54 xfs_inode_t *dp, /* directory within whose allocate
55 the inode. */ 55 the inode. */
56 mode_t mode, 56 umode_t mode,
57 xfs_nlink_t nlink, 57 xfs_nlink_t nlink,
58 xfs_dev_t rdev, 58 xfs_dev_t rdev,
59 prid_t prid, /* project id */ 59 prid_t prid, /* project id */
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 456fca314933..5eeab4690cfe 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -18,7 +18,7 @@
18#ifndef __XFS_UTILS_H__ 18#ifndef __XFS_UTILS_H__
19#define __XFS_UTILS_H__ 19#define __XFS_UTILS_H__
20 20
21extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, 21extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, umode_t, xfs_nlink_t,
22 xfs_dev_t, prid_t, int, xfs_inode_t **, int *); 22 xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
23extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); 23extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
24extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); 24extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index ce9268a2f56b..f2fea868d4db 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -822,7 +822,7 @@ int
822xfs_create( 822xfs_create(
823 xfs_inode_t *dp, 823 xfs_inode_t *dp,
824 struct xfs_name *name, 824 struct xfs_name *name,
825 mode_t mode, 825 umode_t mode,
826 xfs_dev_t rdev, 826 xfs_dev_t rdev,
827 xfs_inode_t **ipp) 827 xfs_inode_t **ipp)
828{ 828{
@@ -1481,7 +1481,7 @@ xfs_symlink(
1481 xfs_inode_t *dp, 1481 xfs_inode_t *dp,
1482 struct xfs_name *link_name, 1482 struct xfs_name *link_name,
1483 const char *target_path, 1483 const char *target_path,
1484 mode_t mode, 1484 umode_t mode,
1485 xfs_inode_t **ipp) 1485 xfs_inode_t **ipp)
1486{ 1486{
1487 xfs_mount_t *mp = dp->i_mount; 1487 xfs_mount_t *mp = dp->i_mount;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 35d3d513e1e9..0c877cbde142 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -26,7 +26,7 @@ int xfs_release(struct xfs_inode *ip);
26int xfs_inactive(struct xfs_inode *ip); 26int xfs_inactive(struct xfs_inode *ip);
27int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 27int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
28 struct xfs_inode **ipp, struct xfs_name *ci_name); 28 struct xfs_inode **ipp, struct xfs_name *ci_name);
29int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, 29int xfs_create(struct xfs_inode *dp, struct xfs_name *name, umode_t mode,
30 xfs_dev_t rdev, struct xfs_inode **ipp); 30 xfs_dev_t rdev, struct xfs_inode **ipp);
31int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 31int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
32 struct xfs_inode *ip); 32 struct xfs_inode *ip);
@@ -35,7 +35,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
35int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 35int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
36 xfs_off_t *offset, filldir_t filldir); 36 xfs_off_t *offset, filldir_t filldir);
37int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 37int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
38 const char *target_path, mode_t mode, struct xfs_inode **ipp); 38 const char *target_path, umode_t mode, struct xfs_inode **ipp);
39int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); 39int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
40int xfs_change_file_space(struct xfs_inode *ip, int cmd, 40int xfs_change_file_space(struct xfs_inode *ip, int cmd,
41 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); 41 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);