aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c34
-rw-r--r--fs/9p/cache.c204
-rw-r--r--fs/9p/cache.h64
-rw-r--r--fs/9p/fid.c127
-rw-r--r--fs/9p/fid.h5
-rw-r--r--fs/9p/v9fs.c108
-rw-r--r--fs/9p/v9fs.h59
-rw-r--r--fs/9p/v9fs_vfs.h26
-rw-r--r--fs/9p/vfs_addr.c194
-rw-r--r--fs/9p/vfs_dentry.c47
-rw-r--r--fs/9p/vfs_dir.c1
-rw-r--r--fs/9p/vfs_file.c323
-rw-r--r--fs/9p/vfs_inode.c322
-rw-r--r--fs/9p/vfs_inode_dotl.c206
-rw-r--r--fs/9p/vfs_super.c67
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Makefile3
-rw-r--r--fs/adfs/Kconfig1
-rw-r--r--fs/adfs/adfs.h25
-rw-r--r--fs/adfs/dir.c6
-rw-r--r--fs/adfs/dir_f.c23
-rw-r--r--fs/adfs/dir_fplus.c119
-rw-r--r--fs/adfs/inode.c68
-rw-r--r--fs/adfs/super.c36
-rw-r--r--fs/affs/Makefile2
-rw-r--r--fs/aio.c8
-rw-r--r--fs/attr.c4
-rw-r--r--fs/autofs4/root.c6
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/binfmt_elf.c4
-rw-r--r--fs/bio.c2
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/export.c8
-rw-r--r--fs/btrfs/extent-tree.c35
-rw-r--r--fs/btrfs/extent_io.c33
-rw-r--r--fs/btrfs/file.c114
-rw-r--r--fs/btrfs/inode.c22
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/xattr.c6
-rw-r--r--fs/btrfs/xattr.h3
-rw-r--r--fs/btrfs/zlib.c3
-rw-r--r--fs/cachefiles/namei.c52
-rw-r--r--fs/ceph/debugfs.c6
-rw-r--r--fs/ceph/dir.c52
-rw-r--r--fs/ceph/file.c10
-rw-r--r--fs/ceph/inode.c27
-rw-r--r--fs/ceph/super.c9
-rw-r--r--fs/ceph/super.h67
-rw-r--r--fs/coda/Makefile2
-rw-r--r--fs/coda/sysctl.c8
-rw-r--r--fs/compat.c72
-rw-r--r--fs/dcache.c128
-rw-r--r--fs/debugfs/inode.c26
-rw-r--r--fs/devpts/inode.c21
-rw-r--r--fs/direct-io.c6
-rw-r--r--fs/dlm/ast.c257
-rw-r--r--fs/dlm/ast.h7
-rw-r--r--fs/dlm/config.c4
-rw-r--r--fs/dlm/debug_fs.c4
-rw-r--r--fs/dlm/dlm_internal.h35
-rw-r--r--fs/dlm/lock.c38
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/dlm/rcom.c4
-rw-r--r--fs/dlm/user.c185
-rw-r--r--fs/dlm/user.h3
-rw-r--r--fs/drop_caches.c6
-rw-r--r--fs/eventpoll.c64
-rw-r--r--fs/exec.c20
-rw-r--r--fs/exofs/common.h18
-rw-r--r--fs/exofs/dir.c33
-rw-r--r--fs/exofs/exofs.h6
-rw-r--r--fs/exofs/file.c16
-rw-r--r--fs/exofs/inode.c51
-rw-r--r--fs/exofs/namei.c8
-rw-r--r--fs/exofs/super.c190
-rw-r--r--fs/exportfs/expfs.c11
-rw-r--r--fs/ext2/acl.c2
-rw-r--r--fs/ext2/ext2.h8
-rw-r--r--fs/ext2/ialloc.c5
-rw-r--r--fs/ext2/ioctl.c6
-rw-r--r--fs/ext2/namei.c17
-rw-r--r--fs/ext2/xattr.h6
-rw-r--r--fs/ext2/xattr_security.c5
-rw-r--r--fs/ext3/acl.c2
-rw-r--r--fs/ext3/balloc.c21
-rw-r--r--fs/ext3/ialloc.c5
-rw-r--r--fs/ext3/ioctl.c6
-rw-r--r--fs/ext3/namei.c17
-rw-r--r--fs/ext3/super.c8
-rw-r--r--fs/ext3/xattr.h4
-rw-r--r--fs/ext3/xattr_security.c5
-rw-r--r--fs/ext4/acl.c2
-rw-r--r--fs/ext4/ext4.h12
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/ioctl.c8
-rw-r--r--fs/ext4/namei.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/ext4/xattr.h4
-rw-r--r--fs/ext4/xattr_security.c5
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/fat/namei_vfat.c4
-rw-r--r--fs/fcntl.c39
-rw-r--r--fs/fhandle.c265
-rw-r--r--fs/fifo.c3
-rw-r--r--fs/file_table.c64
-rw-r--r--fs/fuse/cuse.c14
-rw-r--r--fs/fuse/dev.c27
-rw-r--r--fs/fuse/dir.c38
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/generic_acl.c2
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/acl.c7
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/bmap.c20
-rw-r--r--fs/gfs2/dentry.c2
-rw-r--r--fs/gfs2/export.c8
-rw-r--r--fs/gfs2/file.c79
-rw-r--r--fs/gfs2/glock.c410
-rw-r--r--fs/gfs2/glock.h39
-rw-r--r--fs/gfs2/glops.c33
-rw-r--r--fs/gfs2/incore.h7
-rw-r--r--fs/gfs2/inode.c7
-rw-r--r--fs/gfs2/lock_dlm.c14
-rw-r--r--fs/gfs2/log.c32
-rw-r--r--fs/gfs2/lops.c10
-rw-r--r--fs/gfs2/main.c6
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c11
-rw-r--r--fs/gfs2/ops_inode.c10
-rw-r--r--fs/gfs2/quota.c14
-rw-r--r--fs/gfs2/rgrp.c34
-rw-r--r--fs/gfs2/rgrp.h2
-rw-r--r--fs/hfs/dir.c50
-rw-r--r--fs/hfsplus/ioctl.c2
-rw-r--r--fs/hpfs/Kconfig2
-rw-r--r--fs/hpfs/dir.c23
-rw-r--r--fs/hpfs/file.c9
-rw-r--r--fs/hpfs/hpfs_fn.h22
-rw-r--r--fs/hpfs/inode.c9
-rw-r--r--fs/hpfs/namei.c49
-rw-r--r--fs/hpfs/super.c23
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--fs/inode.c54
-rw-r--r--fs/internal.h19
-rw-r--r--fs/ioctl.c21
-rw-r--r--fs/isofs/export.c8
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/compr_zlib.c7
-rw-r--r--fs/jffs2/dir.c9
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/security.c5
-rw-r--r--fs/jffs2/write.c18
-rw-r--r--fs/jffs2/xattr.h5
-rw-r--r--fs/jfs/Makefile2
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_xattr.h5
-rw-r--r--fs/jfs/namei.c13
-rw-r--r--fs/jfs/xattr.c8
-rw-r--r--fs/locks.c1
-rw-r--r--fs/logfs/compr.c2
-rw-r--r--fs/logfs/file.c2
-rw-r--r--fs/minix/Kconfig8
-rw-r--r--fs/minix/minix.h74
-rw-r--r--fs/minix/namei.c8
-rw-r--r--fs/namei.c1528
-rw-r--r--fs/namespace.c353
-rw-r--r--fs/ncpfs/Makefile2
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c131
-rw-r--r--fs/nfs/dir.c13
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/getroot.c42
-rw-r--r--fs/nfs/idmap.c90
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/internal.h43
-rw-r--r--fs/nfs/namespace.c66
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4_fs.h38
-rw-r--r--fs/nfs/nfs4filelayout.c361
-rw-r--r--fs/nfs/nfs4filelayout.h19
-rw-r--r--fs/nfs/nfs4filelayoutdev.c256
-rw-r--r--fs/nfs/nfs4namespace.c41
-rw-r--r--fs/nfs/nfs4proc.c254
-rw-r--r--fs/nfs/nfs4renewd.c6
-rw-r--r--fs/nfs/nfs4state.c35
-rw-r--r--fs/nfs/nfs4xdr.c42
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/pagelist.c22
-rw-r--r--fs/nfs/pnfs.c330
-rw-r--r--fs/nfs/pnfs.h118
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c127
-rw-r--r--fs/nfs/super.c478
-rw-r--r--fs/nfs/unlink.c22
-rw-r--r--fs/nfs/write.c155
-rw-r--r--fs/nfsctl.c21
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/nilfs2/alloc.c12
-rw-r--r--fs/nilfs2/alloc.h2
-rw-r--r--fs/nilfs2/bmap.c12
-rw-r--r--fs/nilfs2/bmap.h3
-rw-r--r--fs/nilfs2/btree.c6
-rw-r--r--fs/nilfs2/dir.c5
-rw-r--r--fs/nilfs2/direct.c4
-rw-r--r--fs/nilfs2/file.c4
-rw-r--r--fs/nilfs2/inode.c83
-rw-r--r--fs/nilfs2/ioctl.c115
-rw-r--r--fs/nilfs2/mdt.h2
-rw-r--r--fs/nilfs2/namei.c10
-rw-r--r--fs/nilfs2/nilfs.h33
-rw-r--r--fs/nilfs2/recovery.c32
-rw-r--r--fs/nilfs2/sb.h85
-rw-r--r--fs/nilfs2/segment.c261
-rw-r--r--fs/nilfs2/segment.h14
-rw-r--r--fs/nilfs2/super.c214
-rw-r--r--fs/nilfs2/the_nilfs.c44
-rw-r--r--fs/nilfs2/the_nilfs.h51
-rw-r--r--fs/notify/fanotify/fanotify_user.c2
-rw-r--r--fs/notify/inotify/inotify_user.c2
-rw-r--r--fs/ntfs/Makefile19
-rw-r--r--fs/ocfs2/Makefile4
-rw-r--r--fs/ocfs2/acl.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/ocfs2.h10
-rw-r--r--fs/ocfs2/quota.h3
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/refcounttree.c5
-rw-r--r--fs/ocfs2/super.c7
-rw-r--r--fs/ocfs2/xattr.c10
-rw-r--r--fs/ocfs2/xattr.h4
-rw-r--r--fs/omfs/dir.c66
-rw-r--r--fs/open.c150
-rw-r--r--fs/partitions/osf.c12
-rw-r--r--fs/proc/array.c4
-rw-r--r--fs/proc/base.c208
-rw-r--r--fs/proc/generic.c8
-rw-r--r--fs/proc/inode.c10
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_sysctl.c8
-rw-r--r--fs/proc/root.c32
-rw-r--r--fs/proc/task_mmu.c135
-rw-r--r--fs/proc/task_nommu.c6
-rw-r--r--fs/pstore/Kconfig13
-rw-r--r--fs/pstore/Makefile7
-rw-r--r--fs/pstore/inode.c311
-rw-r--r--fs/pstore/internal.h6
-rw-r--r--fs/pstore/platform.c192
-rw-r--r--fs/quota/quota_v2.c2
-rw-r--r--fs/reiserfs/Makefile4
-rw-r--r--fs/reiserfs/inode.c7
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c15
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/reiserfs/xattr_security.c3
-rw-r--r--fs/select.c3
-rw-r--r--fs/squashfs/Kconfig12
-rw-r--r--fs/squashfs/decompressor.c34
-rw-r--r--fs/squashfs/decompressor.h7
-rw-r--r--fs/squashfs/dir.c9
-rw-r--r--fs/squashfs/lzo_wrapper.c4
-rw-r--r--fs/squashfs/namei.c12
-rw-r--r--fs/squashfs/squashfs.h1
-rw-r--r--fs/squashfs/squashfs_fs.h4
-rw-r--r--fs/squashfs/super.c15
-rw-r--r--fs/squashfs/xz_wrapper.c53
-rw-r--r--fs/squashfs/zlib_wrapper.c10
-rw-r--r--fs/stat.c7
-rw-r--r--fs/statfs.c176
-rw-r--r--fs/super.c159
-rw-r--r--fs/sync.c24
-rw-r--r--fs/sysv/namei.c8
-rw-r--r--fs/ubifs/Kconfig23
-rw-r--r--fs/ubifs/commit.c58
-rw-r--r--fs/ubifs/debug.c34
-rw-r--r--fs/ubifs/debug.h30
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/ubifs/io.c201
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c28
-rw-r--r--fs/ubifs/lprops.c26
-rw-r--r--fs/ubifs/lpt_commit.c56
-rw-r--r--fs/ubifs/orphan.c10
-rw-r--r--fs/ubifs/recovery.c44
-rw-r--r--fs/ubifs/scan.c2
-rw-r--r--fs/ubifs/super.c54
-rw-r--r--fs/ubifs/tnc.c10
-rw-r--r--fs/ubifs/ubifs.h45
-rw-r--r--fs/udf/balloc.c11
-rw-r--r--fs/udf/file.c7
-rw-r--r--fs/udf/inode.c239
-rw-r--r--fs/udf/namei.c18
-rw-r--r--fs/udf/truncate.c146
-rw-r--r--fs/udf/udfdecl.h12
-rw-r--r--fs/ufs/Kconfig1
-rw-r--r--fs/ufs/inode.c78
-rw-r--r--fs/ufs/namei.c44
-rw-r--r--fs/ufs/super.c64
-rw-r--r--fs/ufs/truncate.c5
-rw-r--r--fs/ufs/ufs.h6
-rw-r--r--fs/ufs/util.c2
-rw-r--r--fs/ufs/util.h2
-rw-r--r--fs/utimes.c2
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/Makefile12
-rw-r--r--fs/xfs/linux-2.6/kmem.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c22
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c9
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h23
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c133
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h38
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c128
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c2
-rw-r--r--fs/xfs/quota/xfs_dquot.c48
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c5
-rw-r--r--fs/xfs/quota/xfs_qm.c49
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c3
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c85
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c5
-rw-r--r--fs/xfs/support/debug.c107
-rw-r--r--fs/xfs/support/debug.h61
-rw-r--r--fs/xfs/xfs_alloc.c158
-rw-r--r--fs/xfs/xfs_bmap.c24
-rw-r--r--fs/xfs/xfs_buf_item.c15
-rw-r--r--fs/xfs/xfs_da_btree.c9
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_dir2_node.c25
-rw-r--r--fs/xfs/xfs_error.c22
-rw-r--r--fs/xfs/xfs_error.h19
-rw-r--r--fs/xfs/xfs_fsops.c6
-rw-r--r--fs/xfs/xfs_ialloc.c82
-rw-r--r--fs/xfs/xfs_inode.c129
-rw-r--r--fs/xfs/xfs_inode.h23
-rw-r--r--fs/xfs/xfs_iomap.c12
-rw-r--r--fs/xfs/xfs_log.c124
-rw-r--r--fs/xfs/xfs_log_priv.h4
-rw-r--r--fs/xfs/xfs_log_recover.c223
-rw-r--r--fs/xfs/xfs_mount.c148
-rw-r--r--fs/xfs/xfs_mru_cache.c2
-rw-r--r--fs/xfs/xfs_quota.h3
-rw-r--r--fs/xfs/xfs_rtalloc.c92
-rw-r--r--fs/xfs/xfs_rtalloc.h2
-rw-r--r--fs/xfs/xfs_rw.c58
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_trans_buf.c6
-rw-r--r--fs/xfs/xfs_trans_inode.c22
-rw-r--r--fs/xfs/xfs_vnodeops.c74
369 files changed, 9954 insertions, 6541 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 02a2cf616318..535ab6eccb1a 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -21,8 +21,8 @@
21#include <linux/posix_acl_xattr.h> 21#include <linux/posix_acl_xattr.h>
22#include "xattr.h" 22#include "xattr.h"
23#include "acl.h" 23#include "acl.h"
24#include "v9fs_vfs.h"
25#include "v9fs.h" 24#include "v9fs.h"
25#include "v9fs_vfs.h"
26 26
27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) 27static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
28{ 28{
@@ -59,7 +59,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
59 struct v9fs_session_info *v9ses; 59 struct v9fs_session_info *v9ses;
60 60
61 v9ses = v9fs_inode2v9ses(inode); 61 v9ses = v9fs_inode2v9ses(inode);
62 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 62 if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
63 ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
63 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL); 64 set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
64 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); 65 set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
65 return 0; 66 return 0;
@@ -71,11 +72,15 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
71 if (!IS_ERR(dacl) && !IS_ERR(pacl)) { 72 if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
72 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); 73 set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
73 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl); 74 set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
74 posix_acl_release(dacl);
75 posix_acl_release(pacl);
76 } else 75 } else
77 retval = -EIO; 76 retval = -EIO;
78 77
78 if (!IS_ERR(dacl))
79 posix_acl_release(dacl);
80
81 if (!IS_ERR(pacl))
82 posix_acl_release(pacl);
83
79 return retval; 84 return retval;
80} 85}
81 86
@@ -100,9 +105,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
100 return -ECHILD; 105 return -ECHILD;
101 106
102 v9ses = v9fs_inode2v9ses(inode); 107 v9ses = v9fs_inode2v9ses(inode);
103 if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { 108 if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
109 ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
104 /* 110 /*
105 * On access = client mode get the acl 111 * On access = client and acl = on mode get the acl
106 * values from the server 112 * values from the server
107 */ 113 */
108 return 0; 114 return 0;
@@ -128,6 +134,10 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
128 struct inode *inode = dentry->d_inode; 134 struct inode *inode = dentry->d_inode;
129 135
130 set_cached_acl(inode, type, acl); 136 set_cached_acl(inode, type, acl);
137
138 if (!acl)
139 return 0;
140
131 /* Set a setxattr request to server */ 141 /* Set a setxattr request to server */
132 size = posix_acl_xattr_size(acl->a_count); 142 size = posix_acl_xattr_size(acl->a_count);
133 buffer = kmalloc(size, GFP_KERNEL); 143 buffer = kmalloc(size, GFP_KERNEL);
@@ -177,10 +187,8 @@ int v9fs_acl_chmod(struct dentry *dentry)
177int v9fs_set_create_acl(struct dentry *dentry, 187int v9fs_set_create_acl(struct dentry *dentry,
178 struct posix_acl *dpacl, struct posix_acl *pacl) 188 struct posix_acl *dpacl, struct posix_acl *pacl)
179{ 189{
180 if (dpacl) 190 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
181 v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); 191 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
182 if (pacl)
183 v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
184 posix_acl_release(dpacl); 192 posix_acl_release(dpacl);
185 posix_acl_release(pacl); 193 posix_acl_release(pacl);
186 return 0; 194 return 0;
@@ -254,7 +262,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
254 if (strcmp(name, "") != 0) 262 if (strcmp(name, "") != 0)
255 return -EINVAL; 263 return -EINVAL;
256 264
257 v9ses = v9fs_inode2v9ses(dentry->d_inode); 265 v9ses = v9fs_dentry2v9ses(dentry);
258 /* 266 /*
259 * We allow set/get/list of acl when access=client is not specified 267 * We allow set/get/list of acl when access=client is not specified
260 */ 268 */
@@ -304,7 +312,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
304 if (strcmp(name, "") != 0) 312 if (strcmp(name, "") != 0)
305 return -EINVAL; 313 return -EINVAL;
306 314
307 v9ses = v9fs_inode2v9ses(dentry->d_inode); 315 v9ses = v9fs_dentry2v9ses(dentry);
308 /* 316 /*
309 * set the attribute on the remote. Without even looking at the 317 * set the attribute on the remote. Without even looking at the
310 * xattr value. We leave it to the server to validate 318 * xattr value. We leave it to the server to validate
@@ -315,7 +323,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
315 323
316 if (S_ISLNK(inode->i_mode)) 324 if (S_ISLNK(inode->i_mode))
317 return -EOPNOTSUPP; 325 return -EOPNOTSUPP;
318 if (!is_owner_or_cap(inode)) 326 if (!inode_owner_or_capable(inode))
319 return -EPERM; 327 return -EPERM;
320 if (value) { 328 if (value) {
321 /* update the cached acl value */ 329 /* update the cached acl value */
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 0dbe0d139ac2..5b335c5086a1 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -33,67 +33,11 @@
33 33
34#define CACHETAG_LEN 11 34#define CACHETAG_LEN 11
35 35
36struct kmem_cache *vcookie_cache;
37
38struct fscache_netfs v9fs_cache_netfs = { 36struct fscache_netfs v9fs_cache_netfs = {
39 .name = "9p", 37 .name = "9p",
40 .version = 0, 38 .version = 0,
41}; 39};
42 40
43static void init_once(void *foo)
44{
45 struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
46 vcookie->fscache = NULL;
47 vcookie->qid = NULL;
48 inode_init_once(&vcookie->inode);
49}
50
51/**
52 * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
53 * vcookie to inode mapping
54 *
55 * Returns 0 on success.
56 */
57
58static int v9fs_init_vcookiecache(void)
59{
60 vcookie_cache = kmem_cache_create("vcookie_cache",
61 sizeof(struct v9fs_cookie),
62 0, (SLAB_RECLAIM_ACCOUNT|
63 SLAB_MEM_SPREAD),
64 init_once);
65 if (!vcookie_cache)
66 return -ENOMEM;
67
68 return 0;
69}
70
71/**
72 * v9fs_destroy_vcookiecache - destroy the cache of vcookies
73 *
74 */
75
76static void v9fs_destroy_vcookiecache(void)
77{
78 kmem_cache_destroy(vcookie_cache);
79}
80
81int __v9fs_cache_register(void)
82{
83 int ret;
84 ret = v9fs_init_vcookiecache();
85 if (ret < 0)
86 return ret;
87
88 return fscache_register_netfs(&v9fs_cache_netfs);
89}
90
91void __v9fs_cache_unregister(void)
92{
93 v9fs_destroy_vcookiecache();
94 fscache_unregister_netfs(&v9fs_cache_netfs);
95}
96
97/** 41/**
98 * v9fs_random_cachetag - Generate a random tag to be associated 42 * v9fs_random_cachetag - Generate a random tag to be associated
99 * with a new cache session. 43 * with a new cache session.
@@ -133,9 +77,9 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
133} 77}
134 78
135const struct fscache_cookie_def v9fs_cache_session_index_def = { 79const struct fscache_cookie_def v9fs_cache_session_index_def = {
136 .name = "9P.session", 80 .name = "9P.session",
137 .type = FSCACHE_COOKIE_TYPE_INDEX, 81 .type = FSCACHE_COOKIE_TYPE_INDEX,
138 .get_key = v9fs_cache_session_get_key, 82 .get_key = v9fs_cache_session_get_key,
139}; 83};
140 84
141void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) 85void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
@@ -163,33 +107,33 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
163static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, 107static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
164 void *buffer, uint16_t bufmax) 108 void *buffer, uint16_t bufmax)
165{ 109{
166 const struct v9fs_cookie *vcookie = cookie_netfs_data; 110 const struct v9fs_inode *v9inode = cookie_netfs_data;
167 memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path)); 111 memcpy(buffer, &v9inode->fscache_key->path,
168 112 sizeof(v9inode->fscache_key->path));
169 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode, 113 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
170 vcookie->qid->path); 114 v9inode->fscache_key->path);
171 return sizeof(vcookie->qid->path); 115 return sizeof(v9inode->fscache_key->path);
172} 116}
173 117
174static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, 118static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
175 uint64_t *size) 119 uint64_t *size)
176{ 120{
177 const struct v9fs_cookie *vcookie = cookie_netfs_data; 121 const struct v9fs_inode *v9inode = cookie_netfs_data;
178 *size = i_size_read(&vcookie->inode); 122 *size = i_size_read(&v9inode->vfs_inode);
179 123
180 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode, 124 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode,
181 *size); 125 *size);
182} 126}
183 127
184static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, 128static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
185 void *buffer, uint16_t buflen) 129 void *buffer, uint16_t buflen)
186{ 130{
187 const struct v9fs_cookie *vcookie = cookie_netfs_data; 131 const struct v9fs_inode *v9inode = cookie_netfs_data;
188 memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version)); 132 memcpy(buffer, &v9inode->fscache_key->version,
189 133 sizeof(v9inode->fscache_key->version));
190 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode, 134 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
191 vcookie->qid->version); 135 v9inode->fscache_key->version);
192 return sizeof(vcookie->qid->version); 136 return sizeof(v9inode->fscache_key->version);
193} 137}
194 138
195static enum 139static enum
@@ -197,13 +141,13 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
197 const void *buffer, 141 const void *buffer,
198 uint16_t buflen) 142 uint16_t buflen)
199{ 143{
200 const struct v9fs_cookie *vcookie = cookie_netfs_data; 144 const struct v9fs_inode *v9inode = cookie_netfs_data;
201 145
202 if (buflen != sizeof(vcookie->qid->version)) 146 if (buflen != sizeof(v9inode->fscache_key->version))
203 return FSCACHE_CHECKAUX_OBSOLETE; 147 return FSCACHE_CHECKAUX_OBSOLETE;
204 148
205 if (memcmp(buffer, &vcookie->qid->version, 149 if (memcmp(buffer, &v9inode->fscache_key->version,
206 sizeof(vcookie->qid->version))) 150 sizeof(v9inode->fscache_key->version)))
207 return FSCACHE_CHECKAUX_OBSOLETE; 151 return FSCACHE_CHECKAUX_OBSOLETE;
208 152
209 return FSCACHE_CHECKAUX_OKAY; 153 return FSCACHE_CHECKAUX_OKAY;
@@ -211,7 +155,7 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
211 155
212static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) 156static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
213{ 157{
214 struct v9fs_cookie *vcookie = cookie_netfs_data; 158 struct v9fs_inode *v9inode = cookie_netfs_data;
215 struct pagevec pvec; 159 struct pagevec pvec;
216 pgoff_t first; 160 pgoff_t first;
217 int loop, nr_pages; 161 int loop, nr_pages;
@@ -220,7 +164,7 @@ static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
220 first = 0; 164 first = 0;
221 165
222 for (;;) { 166 for (;;) {
223 nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping, 167 nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping,
224 first, 168 first,
225 PAGEVEC_SIZE - pagevec_count(&pvec)); 169 PAGEVEC_SIZE - pagevec_count(&pvec));
226 if (!nr_pages) 170 if (!nr_pages)
@@ -249,115 +193,114 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = {
249 193
250void v9fs_cache_inode_get_cookie(struct inode *inode) 194void v9fs_cache_inode_get_cookie(struct inode *inode)
251{ 195{
252 struct v9fs_cookie *vcookie; 196 struct v9fs_inode *v9inode;
253 struct v9fs_session_info *v9ses; 197 struct v9fs_session_info *v9ses;
254 198
255 if (!S_ISREG(inode->i_mode)) 199 if (!S_ISREG(inode->i_mode))
256 return; 200 return;
257 201
258 vcookie = v9fs_inode2cookie(inode); 202 v9inode = V9FS_I(inode);
259 if (vcookie->fscache) 203 if (v9inode->fscache)
260 return; 204 return;
261 205
262 v9ses = v9fs_inode2v9ses(inode); 206 v9ses = v9fs_inode2v9ses(inode);
263 vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, 207 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
264 &v9fs_cache_inode_index_def, 208 &v9fs_cache_inode_index_def,
265 vcookie); 209 v9inode);
266 210
267 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, 211 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
268 vcookie->fscache); 212 v9inode->fscache);
269} 213}
270 214
271void v9fs_cache_inode_put_cookie(struct inode *inode) 215void v9fs_cache_inode_put_cookie(struct inode *inode)
272{ 216{
273 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 217 struct v9fs_inode *v9inode = V9FS_I(inode);
274 218
275 if (!vcookie->fscache) 219 if (!v9inode->fscache)
276 return; 220 return;
277 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, 221 P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
278 vcookie->fscache); 222 v9inode->fscache);
279 223
280 fscache_relinquish_cookie(vcookie->fscache, 0); 224 fscache_relinquish_cookie(v9inode->fscache, 0);
281 vcookie->fscache = NULL; 225 v9inode->fscache = NULL;
282} 226}
283 227
284void v9fs_cache_inode_flush_cookie(struct inode *inode) 228void v9fs_cache_inode_flush_cookie(struct inode *inode)
285{ 229{
286 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 230 struct v9fs_inode *v9inode = V9FS_I(inode);
287 231
288 if (!vcookie->fscache) 232 if (!v9inode->fscache)
289 return; 233 return;
290 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, 234 P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
291 vcookie->fscache); 235 v9inode->fscache);
292 236
293 fscache_relinquish_cookie(vcookie->fscache, 1); 237 fscache_relinquish_cookie(v9inode->fscache, 1);
294 vcookie->fscache = NULL; 238 v9inode->fscache = NULL;
295} 239}
296 240
297void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) 241void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
298{ 242{
299 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 243 struct v9fs_inode *v9inode = V9FS_I(inode);
300 struct p9_fid *fid; 244 struct p9_fid *fid;
301 245
302 if (!vcookie->fscache) 246 if (!v9inode->fscache)
303 return; 247 return;
304 248
305 spin_lock(&vcookie->lock); 249 spin_lock(&v9inode->fscache_lock);
306 fid = filp->private_data; 250 fid = filp->private_data;
307 if ((filp->f_flags & O_ACCMODE) != O_RDONLY) 251 if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
308 v9fs_cache_inode_flush_cookie(inode); 252 v9fs_cache_inode_flush_cookie(inode);
309 else 253 else
310 v9fs_cache_inode_get_cookie(inode); 254 v9fs_cache_inode_get_cookie(inode);
311 255
312 spin_unlock(&vcookie->lock); 256 spin_unlock(&v9inode->fscache_lock);
313} 257}
314 258
315void v9fs_cache_inode_reset_cookie(struct inode *inode) 259void v9fs_cache_inode_reset_cookie(struct inode *inode)
316{ 260{
317 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 261 struct v9fs_inode *v9inode = V9FS_I(inode);
318 struct v9fs_session_info *v9ses; 262 struct v9fs_session_info *v9ses;
319 struct fscache_cookie *old; 263 struct fscache_cookie *old;
320 264
321 if (!vcookie->fscache) 265 if (!v9inode->fscache)
322 return; 266 return;
323 267
324 old = vcookie->fscache; 268 old = v9inode->fscache;
325 269
326 spin_lock(&vcookie->lock); 270 spin_lock(&v9inode->fscache_lock);
327 fscache_relinquish_cookie(vcookie->fscache, 1); 271 fscache_relinquish_cookie(v9inode->fscache, 1);
328 272
329 v9ses = v9fs_inode2v9ses(inode); 273 v9ses = v9fs_inode2v9ses(inode);
330 vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, 274 v9inode->fscache = fscache_acquire_cookie(v9ses->fscache,
331 &v9fs_cache_inode_index_def, 275 &v9fs_cache_inode_index_def,
332 vcookie); 276 v9inode);
333
334 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", 277 P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
335 inode, old, vcookie->fscache); 278 inode, old, v9inode->fscache);
336 279
337 spin_unlock(&vcookie->lock); 280 spin_unlock(&v9inode->fscache_lock);
338} 281}
339 282
340int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) 283int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
341{ 284{
342 struct inode *inode = page->mapping->host; 285 struct inode *inode = page->mapping->host;
343 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 286 struct v9fs_inode *v9inode = V9FS_I(inode);
344 287
345 BUG_ON(!vcookie->fscache); 288 BUG_ON(!v9inode->fscache);
346 289
347 return fscache_maybe_release_page(vcookie->fscache, page, gfp); 290 return fscache_maybe_release_page(v9inode->fscache, page, gfp);
348} 291}
349 292
350void __v9fs_fscache_invalidate_page(struct page *page) 293void __v9fs_fscache_invalidate_page(struct page *page)
351{ 294{
352 struct inode *inode = page->mapping->host; 295 struct inode *inode = page->mapping->host;
353 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 296 struct v9fs_inode *v9inode = V9FS_I(inode);
354 297
355 BUG_ON(!vcookie->fscache); 298 BUG_ON(!v9inode->fscache);
356 299
357 if (PageFsCache(page)) { 300 if (PageFsCache(page)) {
358 fscache_wait_on_page_write(vcookie->fscache, page); 301 fscache_wait_on_page_write(v9inode->fscache, page);
359 BUG_ON(!PageLocked(page)); 302 BUG_ON(!PageLocked(page));
360 fscache_uncache_page(vcookie->fscache, page); 303 fscache_uncache_page(v9inode->fscache, page);
361 } 304 }
362} 305}
363 306
@@ -380,13 +323,13 @@ static void v9fs_vfs_readpage_complete(struct page *page, void *data,
380int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) 323int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
381{ 324{
382 int ret; 325 int ret;
383 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 326 const struct v9fs_inode *v9inode = V9FS_I(inode);
384 327
385 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 328 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
386 if (!vcookie->fscache) 329 if (!v9inode->fscache)
387 return -ENOBUFS; 330 return -ENOBUFS;
388 331
389 ret = fscache_read_or_alloc_page(vcookie->fscache, 332 ret = fscache_read_or_alloc_page(v9inode->fscache,
390 page, 333 page,
391 v9fs_vfs_readpage_complete, 334 v9fs_vfs_readpage_complete,
392 NULL, 335 NULL,
@@ -418,13 +361,13 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
418 unsigned *nr_pages) 361 unsigned *nr_pages)
419{ 362{
420 int ret; 363 int ret;
421 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 364 const struct v9fs_inode *v9inode = V9FS_I(inode);
422 365
423 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); 366 P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
424 if (!vcookie->fscache) 367 if (!v9inode->fscache)
425 return -ENOBUFS; 368 return -ENOBUFS;
426 369
427 ret = fscache_read_or_alloc_pages(vcookie->fscache, 370 ret = fscache_read_or_alloc_pages(v9inode->fscache,
428 mapping, pages, nr_pages, 371 mapping, pages, nr_pages,
429 v9fs_vfs_readpage_complete, 372 v9fs_vfs_readpage_complete,
430 NULL, 373 NULL,
@@ -453,11 +396,22 @@ int __v9fs_readpages_from_fscache(struct inode *inode,
453void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) 396void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
454{ 397{
455 int ret; 398 int ret;
456 const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 399 const struct v9fs_inode *v9inode = V9FS_I(inode);
457 400
458 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); 401 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
459 ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL); 402 ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL);
460 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); 403 P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret);
461 if (ret != 0) 404 if (ret != 0)
462 v9fs_uncache_page(inode, page); 405 v9fs_uncache_page(inode, page);
463} 406}
407
408/*
409 * wait for a page to complete writing to the cache
410 */
411void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page)
412{
413 const struct v9fs_inode *v9inode = V9FS_I(inode);
414 P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
415 if (PageFsCache(page))
416 fscache_wait_on_page_write(v9inode->fscache, page);
417}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index a94192bfaee8..049507a5b01c 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -25,20 +25,6 @@
25#include <linux/fscache.h> 25#include <linux/fscache.h>
26#include <linux/spinlock.h> 26#include <linux/spinlock.h>
27 27
28extern struct kmem_cache *vcookie_cache;
29
30struct v9fs_cookie {
31 spinlock_t lock;
32 struct inode inode;
33 struct fscache_cookie *fscache;
34 struct p9_qid *qid;
35};
36
37static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
38{
39 return container_of(inode, struct v9fs_cookie, inode);
40}
41
42extern struct fscache_netfs v9fs_cache_netfs; 28extern struct fscache_netfs v9fs_cache_netfs;
43extern const struct fscache_cookie_def v9fs_cache_session_index_def; 29extern const struct fscache_cookie_def v9fs_cache_session_index_def;
44extern const struct fscache_cookie_def v9fs_cache_inode_index_def; 30extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
@@ -64,23 +50,8 @@ extern int __v9fs_readpages_from_fscache(struct inode *inode,
64 struct list_head *pages, 50 struct list_head *pages,
65 unsigned *nr_pages); 51 unsigned *nr_pages);
66extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); 52extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
67 53extern void __v9fs_fscache_wait_on_page_write(struct inode *inode,
68 54 struct page *page);
69/**
70 * v9fs_cache_register - Register v9fs file system with the cache
71 */
72static inline int v9fs_cache_register(void)
73{
74 return __v9fs_cache_register();
75}
76
77/**
78 * v9fs_cache_unregister - Unregister v9fs from the cache
79 */
80static inline void v9fs_cache_unregister(void)
81{
82 __v9fs_cache_unregister();
83}
84 55
85static inline int v9fs_fscache_release_page(struct page *page, 56static inline int v9fs_fscache_release_page(struct page *page,
86 gfp_t gfp) 57 gfp_t gfp)
@@ -117,28 +88,27 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
117 88
118static inline void v9fs_uncache_page(struct inode *inode, struct page *page) 89static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
119{ 90{
120 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 91 struct v9fs_inode *v9inode = V9FS_I(inode);
121 fscache_uncache_page(vcookie->fscache, page); 92 fscache_uncache_page(v9inode->fscache, page);
122 BUG_ON(PageFsCache(page)); 93 BUG_ON(PageFsCache(page));
123} 94}
124 95
125static inline void v9fs_vcookie_set_qid(struct inode *inode, 96static inline void v9fs_fscache_set_key(struct inode *inode,
126 struct p9_qid *qid) 97 struct p9_qid *qid)
127{ 98{
128 struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); 99 struct v9fs_inode *v9inode = V9FS_I(inode);
129 spin_lock(&vcookie->lock); 100 spin_lock(&v9inode->fscache_lock);
130 vcookie->qid = qid; 101 v9inode->fscache_key = qid;
131 spin_unlock(&vcookie->lock); 102 spin_unlock(&v9inode->fscache_lock);
132} 103}
133 104
134#else /* CONFIG_9P_FSCACHE */ 105static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
135 106 struct page *page)
136static inline int v9fs_cache_register(void)
137{ 107{
138 return 1; 108 return __v9fs_fscache_wait_on_page_write(inode, page);
139} 109}
140 110
141static inline void v9fs_cache_unregister(void) {} 111#else /* CONFIG_9P_FSCACHE */
142 112
143static inline int v9fs_fscache_release_page(struct page *page, 113static inline int v9fs_fscache_release_page(struct page *page,
144 gfp_t gfp) { 114 gfp_t gfp) {
@@ -168,9 +138,11 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode,
168static inline void v9fs_uncache_page(struct inode *inode, struct page *page) 138static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
169{} 139{}
170 140
171static inline void v9fs_vcookie_set_qid(struct inode *inode, 141static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
172 struct p9_qid *qid) 142 struct page *page)
173{} 143{
144 return;
145}
174 146
175#endif /* CONFIG_9P_FSCACHE */ 147#endif /* CONFIG_9P_FSCACHE */
176#endif /* _9P_CACHE_H */ 148#endif /* _9P_CACHE_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index b00223c99d70..0ee594569dcc 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -125,46 +125,17 @@ err_out:
125 return -ENOMEM; 125 return -ENOMEM;
126} 126}
127 127
128/** 128static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
129 * v9fs_fid_lookup - lookup for a fid, try to walk if not found 129 uid_t uid, int any)
130 * @dentry: dentry to look for fid in
131 *
132 * Look for a fid in the specified dentry for the current user.
133 * If no fid is found, try to create one walking from a fid from the parent
134 * dentry (if it has one), or the root dentry. If the user haven't accessed
135 * the fs yet, attach now and walk from the root.
136 */
137
138struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
139{ 130{
140 int i, n, l, clone, any, access;
141 u32 uid;
142 struct p9_fid *fid, *old_fid = NULL;
143 struct dentry *ds; 131 struct dentry *ds;
144 struct v9fs_session_info *v9ses;
145 char **wnames, *uname; 132 char **wnames, *uname;
133 int i, n, l, clone, access;
134 struct v9fs_session_info *v9ses;
135 struct p9_fid *fid, *old_fid = NULL;
146 136
147 v9ses = v9fs_inode2v9ses(dentry->d_inode); 137 v9ses = v9fs_dentry2v9ses(dentry);
148 access = v9ses->flags & V9FS_ACCESS_MASK; 138 access = v9ses->flags & V9FS_ACCESS_MASK;
149 switch (access) {
150 case V9FS_ACCESS_SINGLE:
151 case V9FS_ACCESS_USER:
152 case V9FS_ACCESS_CLIENT:
153 uid = current_fsuid();
154 any = 0;
155 break;
156
157 case V9FS_ACCESS_ANY:
158 uid = v9ses->uid;
159 any = 1;
160 break;
161
162 default:
163 uid = ~0;
164 any = 0;
165 break;
166 }
167
168 fid = v9fs_fid_find(dentry, uid, any); 139 fid = v9fs_fid_find(dentry, uid, any);
169 if (fid) 140 if (fid)
170 return fid; 141 return fid;
@@ -250,6 +221,45 @@ err_out:
250 return fid; 221 return fid;
251} 222}
252 223
224/**
225 * v9fs_fid_lookup - lookup for a fid, try to walk if not found
226 * @dentry: dentry to look for fid in
227 *
228 * Look for a fid in the specified dentry for the current user.
229 * If no fid is found, try to create one walking from a fid from the parent
230 * dentry (if it has one), or the root dentry. If the user haven't accessed
231 * the fs yet, attach now and walk from the root.
232 */
233
234struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
235{
236 uid_t uid;
237 int any, access;
238 struct v9fs_session_info *v9ses;
239
240 v9ses = v9fs_dentry2v9ses(dentry);
241 access = v9ses->flags & V9FS_ACCESS_MASK;
242 switch (access) {
243 case V9FS_ACCESS_SINGLE:
244 case V9FS_ACCESS_USER:
245 case V9FS_ACCESS_CLIENT:
246 uid = current_fsuid();
247 any = 0;
248 break;
249
250 case V9FS_ACCESS_ANY:
251 uid = v9ses->uid;
252 any = 1;
253 break;
254
255 default:
256 uid = ~0;
257 any = 0;
258 break;
259 }
260 return v9fs_fid_lookup_with_uid(dentry, uid, any);
261}
262
253struct p9_fid *v9fs_fid_clone(struct dentry *dentry) 263struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
254{ 264{
255 struct p9_fid *fid, *ret; 265 struct p9_fid *fid, *ret;
@@ -261,3 +271,50 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
261 ret = p9_client_walk(fid, 0, NULL, 1); 271 ret = p9_client_walk(fid, 0, NULL, 1);
262 return ret; 272 return ret;
263} 273}
274
275static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid)
276{
277 struct p9_fid *fid, *ret;
278
279 fid = v9fs_fid_lookup_with_uid(dentry, uid, 0);
280 if (IS_ERR(fid))
281 return fid;
282
283 ret = p9_client_walk(fid, 0, NULL, 1);
284 return ret;
285}
286
287struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
288{
289 int err, flags;
290 struct p9_fid *fid;
291 struct v9fs_session_info *v9ses;
292
293 v9ses = v9fs_dentry2v9ses(dentry);
294 fid = v9fs_fid_clone_with_uid(dentry, 0);
295 if (IS_ERR(fid))
296 goto error_out;
297 /*
298 * writeback fid will only be used to write back the
299 * dirty pages. We always request for the open fid in read-write
300 * mode so that a partial page write which result in page
301 * read can work.
302 *
303 * we don't have a tsyncfs operation for older version
304 * of protocol. So make sure the write back fid is
305 * opened in O_SYNC mode.
306 */
307 if (!v9fs_proto_dotl(v9ses))
308 flags = O_RDWR | O_SYNC;
309 else
310 flags = O_RDWR;
311
312 err = p9_client_open(fid, flags);
313 if (err < 0) {
314 p9_client_clunk(fid);
315 fid = ERR_PTR(err);
316 goto error_out;
317 }
318error_out:
319 return fid;
320}
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index c3bbd6af996d..bb0b6e7f58fc 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -19,7 +19,8 @@
19 * Boston, MA 02111-1301 USA 19 * Boston, MA 02111-1301 USA
20 * 20 *
21 */ 21 */
22 22#ifndef FS_9P_FID_H
23#define FS_9P_FID_H
23#include <linux/list.h> 24#include <linux/list.h>
24 25
25/** 26/**
@@ -45,3 +46,5 @@ struct v9fs_dentry {
45struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); 46struct p9_fid *v9fs_fid_lookup(struct dentry *dentry);
46struct p9_fid *v9fs_fid_clone(struct dentry *dentry); 47struct p9_fid *v9fs_fid_clone(struct dentry *dentry);
47int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); 48int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
49struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
50#endif
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 2f77cd33ba83..c82b017f51f3 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -39,6 +39,7 @@
39 39
40static DEFINE_SPINLOCK(v9fs_sessionlist_lock); 40static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
41static LIST_HEAD(v9fs_sessionlist); 41static LIST_HEAD(v9fs_sessionlist);
42struct kmem_cache *v9fs_inode_cache;
42 43
43/* 44/*
44 * Option Parsing (code inspired by NFS code) 45 * Option Parsing (code inspired by NFS code)
@@ -55,7 +56,7 @@ enum {
55 /* Cache options */ 56 /* Cache options */
56 Opt_cache_loose, Opt_fscache, 57 Opt_cache_loose, Opt_fscache,
57 /* Access options */ 58 /* Access options */
58 Opt_access, 59 Opt_access, Opt_posixacl,
59 /* Error token */ 60 /* Error token */
60 Opt_err 61 Opt_err
61}; 62};
@@ -73,6 +74,7 @@ static const match_table_t tokens = {
73 {Opt_fscache, "fscache"}, 74 {Opt_fscache, "fscache"},
74 {Opt_cachetag, "cachetag=%s"}, 75 {Opt_cachetag, "cachetag=%s"},
75 {Opt_access, "access=%s"}, 76 {Opt_access, "access=%s"},
77 {Opt_posixacl, "posixacl"},
76 {Opt_err, NULL} 78 {Opt_err, NULL}
77}; 79};
78 80
@@ -194,15 +196,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
194 else if (strcmp(s, "any") == 0) 196 else if (strcmp(s, "any") == 0)
195 v9ses->flags |= V9FS_ACCESS_ANY; 197 v9ses->flags |= V9FS_ACCESS_ANY;
196 else if (strcmp(s, "client") == 0) { 198 else if (strcmp(s, "client") == 0) {
197#ifdef CONFIG_9P_FS_POSIX_ACL
198 v9ses->flags |= V9FS_ACCESS_CLIENT; 199 v9ses->flags |= V9FS_ACCESS_CLIENT;
199#else
200 P9_DPRINTK(P9_DEBUG_ERROR,
201 "access=client option not supported\n");
202 kfree(s);
203 ret = -EINVAL;
204 goto free_and_return;
205#endif
206 } else { 200 } else {
207 v9ses->flags |= V9FS_ACCESS_SINGLE; 201 v9ses->flags |= V9FS_ACCESS_SINGLE;
208 v9ses->uid = simple_strtoul(s, &e, 10); 202 v9ses->uid = simple_strtoul(s, &e, 10);
@@ -212,6 +206,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
212 kfree(s); 206 kfree(s);
213 break; 207 break;
214 208
209 case Opt_posixacl:
210#ifdef CONFIG_9P_FS_POSIX_ACL
211 v9ses->flags |= V9FS_POSIX_ACL;
212#else
213 P9_DPRINTK(P9_DEBUG_ERROR,
214 "Not defined CONFIG_9P_FS_POSIX_ACL. "
215 "Ignoring posixacl option\n");
216#endif
217 break;
218
215 default: 219 default:
216 continue; 220 continue;
217 } 221 }
@@ -260,19 +264,12 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
260 list_add(&v9ses->slist, &v9fs_sessionlist); 264 list_add(&v9ses->slist, &v9fs_sessionlist);
261 spin_unlock(&v9fs_sessionlist_lock); 265 spin_unlock(&v9fs_sessionlist_lock);
262 266
263 v9ses->flags = V9FS_ACCESS_USER;
264 strcpy(v9ses->uname, V9FS_DEFUSER); 267 strcpy(v9ses->uname, V9FS_DEFUSER);
265 strcpy(v9ses->aname, V9FS_DEFANAME); 268 strcpy(v9ses->aname, V9FS_DEFANAME);
266 v9ses->uid = ~0; 269 v9ses->uid = ~0;
267 v9ses->dfltuid = V9FS_DEFUID; 270 v9ses->dfltuid = V9FS_DEFUID;
268 v9ses->dfltgid = V9FS_DEFGID; 271 v9ses->dfltgid = V9FS_DEFGID;
269 272
270 rc = v9fs_parse_options(v9ses, data);
271 if (rc < 0) {
272 retval = rc;
273 goto error;
274 }
275
276 v9ses->clnt = p9_client_create(dev_name, data); 273 v9ses->clnt = p9_client_create(dev_name, data);
277 if (IS_ERR(v9ses->clnt)) { 274 if (IS_ERR(v9ses->clnt)) {
278 retval = PTR_ERR(v9ses->clnt); 275 retval = PTR_ERR(v9ses->clnt);
@@ -281,10 +278,20 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
281 goto error; 278 goto error;
282 } 279 }
283 280
284 if (p9_is_proto_dotl(v9ses->clnt)) 281 v9ses->flags = V9FS_ACCESS_USER;
282
283 if (p9_is_proto_dotl(v9ses->clnt)) {
284 v9ses->flags = V9FS_ACCESS_CLIENT;
285 v9ses->flags |= V9FS_PROTO_2000L; 285 v9ses->flags |= V9FS_PROTO_2000L;
286 else if (p9_is_proto_dotu(v9ses->clnt)) 286 } else if (p9_is_proto_dotu(v9ses->clnt)) {
287 v9ses->flags |= V9FS_PROTO_2000U; 287 v9ses->flags |= V9FS_PROTO_2000U;
288 }
289
290 rc = v9fs_parse_options(v9ses, data);
291 if (rc < 0) {
292 retval = rc;
293 goto error;
294 }
288 295
289 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; 296 v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
290 297
@@ -306,6 +313,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
306 v9ses->flags |= V9FS_ACCESS_ANY; 313 v9ses->flags |= V9FS_ACCESS_ANY;
307 v9ses->uid = ~0; 314 v9ses->uid = ~0;
308 } 315 }
316 if (!v9fs_proto_dotl(v9ses) ||
317 !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
318 /*
319 * We support ACL checks on clinet only if the protocol is
320 * 9P2000.L and access is V9FS_ACCESS_CLIENT.
321 */
322 v9ses->flags &= ~V9FS_ACL_MASK;
323 }
309 324
310 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0, 325 fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0,
311 v9ses->aname); 326 v9ses->aname);
@@ -467,6 +482,63 @@ static void v9fs_sysfs_cleanup(void)
467 kobject_put(v9fs_kobj); 482 kobject_put(v9fs_kobj);
468} 483}
469 484
485static void v9fs_inode_init_once(void *foo)
486{
487 struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
488#ifdef CONFIG_9P_FSCACHE
489 v9inode->fscache = NULL;
490 v9inode->fscache_key = NULL;
491#endif
492 inode_init_once(&v9inode->vfs_inode);
493}
494
495/**
496 * v9fs_init_inode_cache - initialize a cache for 9P
497 * Returns 0 on success.
498 */
499static int v9fs_init_inode_cache(void)
500{
501 v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache",
502 sizeof(struct v9fs_inode),
503 0, (SLAB_RECLAIM_ACCOUNT|
504 SLAB_MEM_SPREAD),
505 v9fs_inode_init_once);
506 if (!v9fs_inode_cache)
507 return -ENOMEM;
508
509 return 0;
510}
511
512/**
513 * v9fs_destroy_inode_cache - destroy the cache of 9P inode
514 *
515 */
516static void v9fs_destroy_inode_cache(void)
517{
518 kmem_cache_destroy(v9fs_inode_cache);
519}
520
521static int v9fs_cache_register(void)
522{
523 int ret;
524 ret = v9fs_init_inode_cache();
525 if (ret < 0)
526 return ret;
527#ifdef CONFIG_9P_FSCACHE
528 return fscache_register_netfs(&v9fs_cache_netfs);
529#else
530 return ret;
531#endif
532}
533
534static void v9fs_cache_unregister(void)
535{
536 v9fs_destroy_inode_cache();
537#ifdef CONFIG_9P_FSCACHE
538 fscache_unregister_netfs(&v9fs_cache_netfs);
539#endif
540}
541
470/** 542/**
471 * init_v9fs - Initialize module 543 * init_v9fs - Initialize module
472 * 544 *
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c4b5d8864f0d..9665c2b840e6 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -20,6 +20,9 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#ifndef FS_9P_V9FS_H
24#define FS_9P_V9FS_H
25
23#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
24 27
25/** 28/**
@@ -28,8 +31,10 @@
28 * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions 31 * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions
29 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy 32 * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
30 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) 33 * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
34 * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client.
31 * @V9FS_ACCESS_ANY: use a single attach for all users 35 * @V9FS_ACCESS_ANY: use a single attach for all users
32 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options 36 * @V9FS_ACCESS_MASK: bit mask of different ACCESS options
37 * @V9FS_POSIX_ACL: POSIX ACLs are enforced
33 * 38 *
34 * Session flags reflect options selected by users at mount time 39 * Session flags reflect options selected by users at mount time
35 */ 40 */
@@ -37,13 +42,15 @@
37 V9FS_ACCESS_USER | \ 42 V9FS_ACCESS_USER | \
38 V9FS_ACCESS_CLIENT) 43 V9FS_ACCESS_CLIENT)
39#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY 44#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
45#define V9FS_ACL_MASK V9FS_POSIX_ACL
40 46
41enum p9_session_flags { 47enum p9_session_flags {
42 V9FS_PROTO_2000U = 0x01, 48 V9FS_PROTO_2000U = 0x01,
43 V9FS_PROTO_2000L = 0x02, 49 V9FS_PROTO_2000L = 0x02,
44 V9FS_ACCESS_SINGLE = 0x04, 50 V9FS_ACCESS_SINGLE = 0x04,
45 V9FS_ACCESS_USER = 0x08, 51 V9FS_ACCESS_USER = 0x08,
46 V9FS_ACCESS_CLIENT = 0x10 52 V9FS_ACCESS_CLIENT = 0x10,
53 V9FS_POSIX_ACL = 0x20
47}; 54};
48 55
49/* possible values of ->cache */ 56/* possible values of ->cache */
@@ -109,8 +116,29 @@ struct v9fs_session_info {
109 struct list_head slist; /* list of sessions registered with v9fs */ 116 struct list_head slist; /* list of sessions registered with v9fs */
110 struct backing_dev_info bdi; 117 struct backing_dev_info bdi;
111 struct rw_semaphore rename_sem; 118 struct rw_semaphore rename_sem;
119 struct p9_fid *root_fid; /* Used for file system sync */
120};
121
122/* cache_validity flags */
123#define V9FS_INO_INVALID_ATTR 0x01
124
125struct v9fs_inode {
126#ifdef CONFIG_9P_FSCACHE
127 spinlock_t fscache_lock;
128 struct fscache_cookie *fscache;
129 struct p9_qid *fscache_key;
130#endif
131 unsigned int cache_validity;
132 struct p9_fid *writeback_fid;
133 struct mutex v_mutex;
134 struct inode vfs_inode;
112}; 135};
113 136
137static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
138{
139 return container_of(inode, struct v9fs_inode, vfs_inode);
140}
141
114struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, 142struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
115 char *); 143 char *);
116extern void v9fs_session_close(struct v9fs_session_info *v9ses); 144extern void v9fs_session_close(struct v9fs_session_info *v9ses);
@@ -124,16 +152,15 @@ extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
124 struct inode *new_dir, struct dentry *new_dentry); 152 struct inode *new_dir, struct dentry *new_dentry);
125extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, 153extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
126 void *p); 154 void *p);
127extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses, 155extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
128 struct p9_fid *fid, 156 struct p9_fid *fid,
129 struct super_block *sb); 157 struct super_block *sb);
130
131extern const struct inode_operations v9fs_dir_inode_operations_dotl; 158extern const struct inode_operations v9fs_dir_inode_operations_dotl;
132extern const struct inode_operations v9fs_file_inode_operations_dotl; 159extern const struct inode_operations v9fs_file_inode_operations_dotl;
133extern const struct inode_operations v9fs_symlink_inode_operations_dotl; 160extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
134extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses, 161extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
135 struct p9_fid *fid, 162 struct p9_fid *fid,
136 struct super_block *sb); 163 struct super_block *sb);
137 164
138/* other default globals */ 165/* other default globals */
139#define V9FS_PORT 564 166#define V9FS_PORT 564
@@ -147,6 +174,11 @@ static inline struct v9fs_session_info *v9fs_inode2v9ses(struct inode *inode)
147 return (inode->i_sb->s_fs_info); 174 return (inode->i_sb->s_fs_info);
148} 175}
149 176
177static inline struct v9fs_session_info *v9fs_dentry2v9ses(struct dentry *dentry)
178{
179 return dentry->d_sb->s_fs_info;
180}
181
150static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses) 182static inline int v9fs_proto_dotu(struct v9fs_session_info *v9ses)
151{ 183{
152 return v9ses->flags & V9FS_PROTO_2000U; 184 return v9ses->flags & V9FS_PROTO_2000U;
@@ -158,7 +190,7 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
158} 190}
159 191
160/** 192/**
161 * v9fs_inode_from_fid - Helper routine to populate an inode by 193 * v9fs_get_inode_from_fid - Helper routine to populate an inode by
162 * issuing a attribute request 194 * issuing a attribute request
163 * @v9ses: session information 195 * @v9ses: session information
164 * @fid: fid to issue attribute request for 196 * @fid: fid to issue attribute request for
@@ -166,11 +198,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses)
166 * 198 *
167 */ 199 */
168static inline struct inode * 200static inline struct inode *
169v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, 201v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
170 struct super_block *sb) 202 struct super_block *sb)
171{ 203{
172 if (v9fs_proto_dotl(v9ses)) 204 if (v9fs_proto_dotl(v9ses))
173 return v9fs_inode_dotl(v9ses, fid, sb); 205 return v9fs_inode_from_fid_dotl(v9ses, fid, sb);
174 else 206 else
175 return v9fs_inode(v9ses, fid, sb); 207 return v9fs_inode_from_fid(v9ses, fid, sb);
176} 208}
209#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index b789f8e597ec..4014160903a9 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -20,6 +20,8 @@
20 * Boston, MA 02111-1301 USA 20 * Boston, MA 02111-1301 USA
21 * 21 *
22 */ 22 */
23#ifndef FS_9P_V9FS_VFS_H
24#define FS_9P_V9FS_VFS_H
23 25
24/* plan9 semantics are that created files are implicitly opened. 26/* plan9 semantics are that created files are implicitly opened.
25 * But linux semantics are that you call create, then open. 27 * But linux semantics are that you call create, then open.
@@ -36,6 +38,7 @@
36 * unlink calls remove, which is an implicit clunk. So we have to track 38 * unlink calls remove, which is an implicit clunk. So we have to track
37 * that kind of thing so that we don't try to clunk a dead fid. 39 * that kind of thing so that we don't try to clunk a dead fid.
38 */ 40 */
41#define P9_LOCK_TIMEOUT (30*HZ)
39 42
40extern struct file_system_type v9fs_fs_type; 43extern struct file_system_type v9fs_fs_type;
41extern const struct address_space_operations v9fs_addr_operations; 44extern const struct address_space_operations v9fs_addr_operations;
@@ -45,13 +48,15 @@ extern const struct file_operations v9fs_dir_operations;
45extern const struct file_operations v9fs_dir_operations_dotl; 48extern const struct file_operations v9fs_dir_operations_dotl;
46extern const struct dentry_operations v9fs_dentry_operations; 49extern const struct dentry_operations v9fs_dentry_operations;
47extern const struct dentry_operations v9fs_cached_dentry_operations; 50extern const struct dentry_operations v9fs_cached_dentry_operations;
51extern const struct file_operations v9fs_cached_file_operations;
52extern const struct file_operations v9fs_cached_file_operations_dotl;
53extern struct kmem_cache *v9fs_inode_cache;
48 54
49#ifdef CONFIG_9P_FSCACHE
50struct inode *v9fs_alloc_inode(struct super_block *sb); 55struct inode *v9fs_alloc_inode(struct super_block *sb);
51void v9fs_destroy_inode(struct inode *inode); 56void v9fs_destroy_inode(struct inode *inode);
52#endif
53
54struct inode *v9fs_get_inode(struct super_block *sb, int mode); 57struct inode *v9fs_get_inode(struct super_block *sb, int mode);
58int v9fs_init_inode(struct v9fs_session_info *v9ses,
59 struct inode *inode, int mode);
55void v9fs_evict_inode(struct inode *inode); 60void v9fs_evict_inode(struct inode *inode);
56ino_t v9fs_qid2ino(struct p9_qid *qid); 61ino_t v9fs_qid2ino(struct p9_qid *qid);
57void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); 62void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
@@ -62,8 +67,19 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
62int v9fs_uflags2omode(int uflags, int extended); 67int v9fs_uflags2omode(int uflags, int extended);
63 68
64ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); 69ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
70ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
65void v9fs_blank_wstat(struct p9_wstat *wstat); 71void v9fs_blank_wstat(struct p9_wstat *wstat);
66int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); 72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
67int v9fs_file_fsync_dotl(struct file *filp, int datasync); 73int v9fs_file_fsync_dotl(struct file *filp, int datasync);
68 74ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
69#define P9_LOCK_TIMEOUT (30*HZ) 75 const char __user *, size_t, loff_t *, int);
76int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
77int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode);
78static inline void v9fs_invalidate_inode_attr(struct inode *inode)
79{
80 struct v9fs_inode *v9inode;
81 v9inode = V9FS_I(inode);
82 v9inode->cache_validity |= V9FS_INO_INVALID_ATTR;
83 return;
84}
85#endif
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index b7f2a8e3863e..2524e4cbb8ea 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -39,16 +39,16 @@
39#include "v9fs.h" 39#include "v9fs.h"
40#include "v9fs_vfs.h" 40#include "v9fs_vfs.h"
41#include "cache.h" 41#include "cache.h"
42#include "fid.h"
42 43
43/** 44/**
44 * v9fs_vfs_readpage - read an entire page in from 9P 45 * v9fs_fid_readpage - read an entire page in from 9P
45 * 46 *
46 * @filp: file being read 47 * @fid: fid being read
47 * @page: structure to page 48 * @page: structure to page
48 * 49 *
49 */ 50 */
50 51static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
51static int v9fs_vfs_readpage(struct file *filp, struct page *page)
52{ 52{
53 int retval; 53 int retval;
54 loff_t offset; 54 loff_t offset;
@@ -67,7 +67,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
67 buffer = kmap(page); 67 buffer = kmap(page);
68 offset = page_offset(page); 68 offset = page_offset(page);
69 69
70 retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); 70 retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset);
71 if (retval < 0) { 71 if (retval < 0) {
72 v9fs_uncache_page(inode, page); 72 v9fs_uncache_page(inode, page);
73 goto done; 73 goto done;
@@ -87,6 +87,19 @@ done:
87} 87}
88 88
89/** 89/**
90 * v9fs_vfs_readpage - read an entire page in from 9P
91 *
92 * @filp: file being read
93 * @page: structure to page
94 *
95 */
96
97static int v9fs_vfs_readpage(struct file *filp, struct page *page)
98{
99 return v9fs_fid_readpage(filp->private_data, page);
100}
101
102/**
90 * v9fs_vfs_readpages - read a set of pages from 9P 103 * v9fs_vfs_readpages - read a set of pages from 9P
91 * 104 *
92 * @filp: file being read 105 * @filp: file being read
@@ -124,7 +137,6 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
124{ 137{
125 if (PagePrivate(page)) 138 if (PagePrivate(page))
126 return 0; 139 return 0;
127
128 return v9fs_fscache_release_page(page, gfp); 140 return v9fs_fscache_release_page(page, gfp);
129} 141}
130 142
@@ -137,20 +149,89 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
137 149
138static void v9fs_invalidate_page(struct page *page, unsigned long offset) 150static void v9fs_invalidate_page(struct page *page, unsigned long offset)
139{ 151{
152 /*
153 * If called with zero offset, we should release
154 * the private state assocated with the page
155 */
140 if (offset == 0) 156 if (offset == 0)
141 v9fs_fscache_invalidate_page(page); 157 v9fs_fscache_invalidate_page(page);
142} 158}
143 159
160static int v9fs_vfs_writepage_locked(struct page *page)
161{
162 char *buffer;
163 int retval, len;
164 loff_t offset, size;
165 mm_segment_t old_fs;
166 struct v9fs_inode *v9inode;
167 struct inode *inode = page->mapping->host;
168
169 v9inode = V9FS_I(inode);
170 size = i_size_read(inode);
171 if (page->index == size >> PAGE_CACHE_SHIFT)
172 len = size & ~PAGE_CACHE_MASK;
173 else
174 len = PAGE_CACHE_SIZE;
175
176 set_page_writeback(page);
177
178 buffer = kmap(page);
179 offset = page_offset(page);
180
181 old_fs = get_fs();
182 set_fs(get_ds());
183 /* We should have writeback_fid always set */
184 BUG_ON(!v9inode->writeback_fid);
185
186 retval = v9fs_file_write_internal(inode,
187 v9inode->writeback_fid,
188 (__force const char __user *)buffer,
189 len, &offset, 0);
190 if (retval > 0)
191 retval = 0;
192
193 set_fs(old_fs);
194 kunmap(page);
195 end_page_writeback(page);
196 return retval;
197}
198
199static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
200{
201 int retval;
202
203 retval = v9fs_vfs_writepage_locked(page);
204 if (retval < 0) {
205 if (retval == -EAGAIN) {
206 redirty_page_for_writepage(wbc, page);
207 retval = 0;
208 } else {
209 SetPageError(page);
210 mapping_set_error(page->mapping, retval);
211 }
212 } else
213 retval = 0;
214
215 unlock_page(page);
216 return retval;
217}
218
144/** 219/**
145 * v9fs_launder_page - Writeback a dirty page 220 * v9fs_launder_page - Writeback a dirty page
146 * Since the writes go directly to the server, we simply return a 0
147 * here to indicate success.
148 *
149 * Returns 0 on success. 221 * Returns 0 on success.
150 */ 222 */
151 223
152static int v9fs_launder_page(struct page *page) 224static int v9fs_launder_page(struct page *page)
153{ 225{
226 int retval;
227 struct inode *inode = page->mapping->host;
228
229 v9fs_fscache_wait_on_page_write(inode, page);
230 if (clear_page_dirty_for_io(page)) {
231 retval = v9fs_vfs_writepage_locked(page);
232 if (retval)
233 return retval;
234 }
154 return 0; 235 return 0;
155} 236}
156 237
@@ -173,9 +254,15 @@ static int v9fs_launder_page(struct page *page)
173 * with an error. 254 * with an error.
174 * 255 *
175 */ 256 */
176ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 257static ssize_t
177 loff_t pos, unsigned long nr_segs) 258v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
259 loff_t pos, unsigned long nr_segs)
178{ 260{
261 /*
262 * FIXME
263 * Now that we do caching with cache mode enabled, We need
264 * to support direct IO
265 */
179 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " 266 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
180 "off/no(%lld/%lu) EINVAL\n", 267 "off/no(%lld/%lu) EINVAL\n",
181 iocb->ki_filp->f_path.dentry->d_name.name, 268 iocb->ki_filp->f_path.dentry->d_name.name,
@@ -183,11 +270,84 @@ ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
183 270
184 return -EINVAL; 271 return -EINVAL;
185} 272}
273
274static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
275 loff_t pos, unsigned len, unsigned flags,
276 struct page **pagep, void **fsdata)
277{
278 int retval = 0;
279 struct page *page;
280 struct v9fs_inode *v9inode;
281 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
282 struct inode *inode = mapping->host;
283
284 v9inode = V9FS_I(inode);
285start:
286 page = grab_cache_page_write_begin(mapping, index, flags);
287 if (!page) {
288 retval = -ENOMEM;
289 goto out;
290 }
291 BUG_ON(!v9inode->writeback_fid);
292 if (PageUptodate(page))
293 goto out;
294
295 if (len == PAGE_CACHE_SIZE)
296 goto out;
297
298 retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
299 page_cache_release(page);
300 if (!retval)
301 goto start;
302out:
303 *pagep = page;
304 return retval;
305}
306
307static int v9fs_write_end(struct file *filp, struct address_space *mapping,
308 loff_t pos, unsigned len, unsigned copied,
309 struct page *page, void *fsdata)
310{
311 loff_t last_pos = pos + copied;
312 struct inode *inode = page->mapping->host;
313
314 if (unlikely(copied < len)) {
315 /*
316 * zero out the rest of the area
317 */
318 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
319
320 zero_user(page, from + copied, len - copied);
321 flush_dcache_page(page);
322 }
323
324 if (!PageUptodate(page))
325 SetPageUptodate(page);
326 /*
327 * No need to use i_size_read() here, the i_size
328 * cannot change under us because we hold the i_mutex.
329 */
330 if (last_pos > inode->i_size) {
331 inode_add_bytes(inode, last_pos - inode->i_size);
332 i_size_write(inode, last_pos);
333 }
334 set_page_dirty(page);
335 unlock_page(page);
336 page_cache_release(page);
337
338 return copied;
339}
340
341
186const struct address_space_operations v9fs_addr_operations = { 342const struct address_space_operations v9fs_addr_operations = {
187 .readpage = v9fs_vfs_readpage, 343 .readpage = v9fs_vfs_readpage,
188 .readpages = v9fs_vfs_readpages, 344 .readpages = v9fs_vfs_readpages,
189 .releasepage = v9fs_release_page, 345 .set_page_dirty = __set_page_dirty_nobuffers,
190 .invalidatepage = v9fs_invalidate_page, 346 .writepage = v9fs_vfs_writepage,
191 .launder_page = v9fs_launder_page, 347 .write_begin = v9fs_write_begin,
192 .direct_IO = v9fs_direct_IO, 348 .write_end = v9fs_write_end,
349 .releasepage = v9fs_release_page,
350 .invalidatepage = v9fs_invalidate_page,
351 .launder_page = v9fs_launder_page,
352 .direct_IO = v9fs_direct_IO,
193}; 353};
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 233b7d4ffe5e..b6a3b9f7fe4d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -63,20 +63,15 @@ static int v9fs_dentry_delete(const struct dentry *dentry)
63 * v9fs_cached_dentry_delete - called when dentry refcount equals 0 63 * v9fs_cached_dentry_delete - called when dentry refcount equals 0
64 * @dentry: dentry in question 64 * @dentry: dentry in question
65 * 65 *
66 * Only return 1 if our inode is invalid. Only non-synthetic files
67 * (ones without mtime == 0) should be calling this function.
68 *
69 */ 66 */
70
71static int v9fs_cached_dentry_delete(const struct dentry *dentry) 67static int v9fs_cached_dentry_delete(const struct dentry *dentry)
72{ 68{
73 struct inode *inode = dentry->d_inode; 69 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n",
74 P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, 70 dentry->d_name.name, dentry);
75 dentry);
76 71
77 if(!inode) 72 /* Don't cache negative dentries */
73 if (!dentry->d_inode)
78 return 1; 74 return 1;
79
80 return 0; 75 return 0;
81} 76}
82 77
@@ -105,7 +100,41 @@ static void v9fs_dentry_release(struct dentry *dentry)
105 } 100 }
106} 101}
107 102
103static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
104{
105 struct p9_fid *fid;
106 struct inode *inode;
107 struct v9fs_inode *v9inode;
108
109 if (nd->flags & LOOKUP_RCU)
110 return -ECHILD;
111
112 inode = dentry->d_inode;
113 if (!inode)
114 goto out_valid;
115
116 v9inode = V9FS_I(inode);
117 if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
118 int retval;
119 struct v9fs_session_info *v9ses;
120 fid = v9fs_fid_lookup(dentry);
121 if (IS_ERR(fid))
122 return PTR_ERR(fid);
123
124 v9ses = v9fs_inode2v9ses(inode);
125 if (v9fs_proto_dotl(v9ses))
126 retval = v9fs_refresh_inode_dotl(fid, inode);
127 else
128 retval = v9fs_refresh_inode(fid, inode);
129 if (retval <= 0)
130 return retval;
131 }
132out_valid:
133 return 1;
134}
135
108const struct dentry_operations v9fs_cached_dentry_operations = { 136const struct dentry_operations v9fs_cached_dentry_operations = {
137 .d_revalidate = v9fs_lookup_revalidate,
109 .d_delete = v9fs_cached_dentry_delete, 138 .d_delete = v9fs_cached_dentry_delete,
110 .d_release = v9fs_dentry_release, 139 .d_release = v9fs_dentry_release,
111}; 140};
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b84ebe8cefed..9c2bdda5cd9d 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -295,7 +295,6 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
295 P9_DPRINTK(P9_DEBUG_VFS, 295 P9_DPRINTK(P9_DEBUG_VFS,
296 "v9fs_dir_release: inode: %p filp: %p fid: %d\n", 296 "v9fs_dir_release: inode: %p filp: %p fid: %d\n",
297 inode, filp, fid ? fid->fid : -1); 297 inode, filp, fid ? fid->fid : -1);
298 filemap_write_and_wait(inode->i_mapping);
299 if (fid) 298 if (fid)
300 p9_client_clunk(fid); 299 p9_client_clunk(fid);
301 return 0; 300 return 0;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 240c30674396..ffed55817f0c 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -44,8 +44,7 @@
44#include "fid.h" 44#include "fid.h"
45#include "cache.h" 45#include "cache.h"
46 46
47static const struct file_operations v9fs_cached_file_operations; 47static const struct vm_operations_struct v9fs_file_vm_ops;
48static const struct file_operations v9fs_cached_file_operations_dotl;
49 48
50/** 49/**
51 * v9fs_file_open - open a file (or directory) 50 * v9fs_file_open - open a file (or directory)
@@ -57,11 +56,13 @@ static const struct file_operations v9fs_cached_file_operations_dotl;
57int v9fs_file_open(struct inode *inode, struct file *file) 56int v9fs_file_open(struct inode *inode, struct file *file)
58{ 57{
59 int err; 58 int err;
59 struct v9fs_inode *v9inode;
60 struct v9fs_session_info *v9ses; 60 struct v9fs_session_info *v9ses;
61 struct p9_fid *fid; 61 struct p9_fid *fid;
62 int omode; 62 int omode;
63 63
64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); 64 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
65 v9inode = V9FS_I(inode);
65 v9ses = v9fs_inode2v9ses(inode); 66 v9ses = v9fs_inode2v9ses(inode);
66 if (v9fs_proto_dotl(v9ses)) 67 if (v9fs_proto_dotl(v9ses))
67 omode = file->f_flags; 68 omode = file->f_flags;
@@ -89,20 +90,34 @@ int v9fs_file_open(struct inode *inode, struct file *file)
89 } 90 }
90 91
91 file->private_data = fid; 92 file->private_data = fid;
92 if ((fid->qid.version) && (v9ses->cache)) { 93 mutex_lock(&v9inode->v_mutex);
93 P9_DPRINTK(P9_DEBUG_VFS, "cached"); 94 if (v9ses->cache && !v9inode->writeback_fid &&
94 /* enable cached file options */ 95 ((file->f_flags & O_ACCMODE) != O_RDONLY)) {
95 if(file->f_op == &v9fs_file_operations) 96 /*
96 file->f_op = &v9fs_cached_file_operations; 97 * clone a fid and add it to writeback_fid
97 else if (file->f_op == &v9fs_file_operations_dotl) 98 * we do it during open time instead of
98 file->f_op = &v9fs_cached_file_operations_dotl; 99 * page dirty time via write_begin/page_mkwrite
99 100 * because we want write after unlink usecase
101 * to work.
102 */
103 fid = v9fs_writeback_fid(file->f_path.dentry);
104 if (IS_ERR(fid)) {
105 err = PTR_ERR(fid);
106 mutex_unlock(&v9inode->v_mutex);
107 goto out_error;
108 }
109 v9inode->writeback_fid = (void *) fid;
110 }
111 mutex_unlock(&v9inode->v_mutex);
100#ifdef CONFIG_9P_FSCACHE 112#ifdef CONFIG_9P_FSCACHE
113 if (v9ses->cache)
101 v9fs_cache_inode_set_cookie(inode, file); 114 v9fs_cache_inode_set_cookie(inode, file);
102#endif 115#endif
103 }
104
105 return 0; 116 return 0;
117out_error:
118 p9_client_clunk(file->private_data);
119 file->private_data = NULL;
120 return err;
106} 121}
107 122
108/** 123/**
@@ -335,25 +350,22 @@ out_err:
335} 350}
336 351
337/** 352/**
338 * v9fs_file_readn - read from a file 353 * v9fs_fid_readn - read from a fid
339 * @filp: file pointer to read 354 * @fid: fid to read
340 * @data: data buffer to read data into 355 * @data: data buffer to read data into
341 * @udata: user data buffer to read data into 356 * @udata: user data buffer to read data into
342 * @count: size of buffer 357 * @count: size of buffer
343 * @offset: offset at which to read data 358 * @offset: offset at which to read data
344 * 359 *
345 */ 360 */
346
347ssize_t 361ssize_t
348v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, 362v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count,
349 u64 offset) 363 u64 offset)
350{ 364{
351 int n, total, size; 365 int n, total, size;
352 struct p9_fid *fid = filp->private_data;
353 366
354 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, 367 P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid,
355 (long long unsigned) offset, count); 368 (long long unsigned) offset, count);
356
357 n = 0; 369 n = 0;
358 total = 0; 370 total = 0;
359 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; 371 size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ;
@@ -379,6 +391,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
379} 391}
380 392
381/** 393/**
394 * v9fs_file_readn - read from a file
395 * @filp: file pointer to read
396 * @data: data buffer to read data into
397 * @udata: user data buffer to read data into
398 * @count: size of buffer
399 * @offset: offset at which to read data
400 *
401 */
402ssize_t
403v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count,
404 u64 offset)
405{
406 return v9fs_fid_readn(filp->private_data, data, udata, count, offset);
407}
408
409/**
382 * v9fs_file_read - read from a file 410 * v9fs_file_read - read from a file
383 * @filp: file pointer to read 411 * @filp: file pointer to read
384 * @udata: user data buffer to read data into 412 * @udata: user data buffer to read data into
@@ -410,45 +438,22 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
410 return ret; 438 return ret;
411} 439}
412 440
413/** 441ssize_t
414 * v9fs_file_write - write to a file 442v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid,
415 * @filp: file pointer to write 443 const char __user *data, size_t count,
416 * @data: data buffer to write data from 444 loff_t *offset, int invalidate)
417 * @count: size of buffer
418 * @offset: offset at which to write data
419 *
420 */
421
422static ssize_t
423v9fs_file_write(struct file *filp, const char __user * data,
424 size_t count, loff_t * offset)
425{ 445{
426 ssize_t retval;
427 size_t total = 0;
428 int n; 446 int n;
429 struct p9_fid *fid; 447 loff_t i_size;
448 size_t total = 0;
430 struct p9_client *clnt; 449 struct p9_client *clnt;
431 struct inode *inode = filp->f_path.dentry->d_inode;
432 loff_t origin = *offset; 450 loff_t origin = *offset;
433 unsigned long pg_start, pg_end; 451 unsigned long pg_start, pg_end;
434 452
435 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, 453 P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data,
436 (int)count, (int)*offset); 454 (int)count, (int)*offset);
437 455
438 fid = filp->private_data;
439 clnt = fid->clnt; 456 clnt = fid->clnt;
440
441 retval = generic_write_checks(filp, &origin, &count, 0);
442 if (retval)
443 goto out;
444
445 retval = -EINVAL;
446 if ((ssize_t) count < 0)
447 goto out;
448 retval = 0;
449 if (!count)
450 goto out;
451
452 do { 457 do {
453 n = p9_client_write(fid, NULL, data+total, origin+total, count); 458 n = p9_client_write(fid, NULL, data+total, origin+total, count);
454 if (n <= 0) 459 if (n <= 0)
@@ -457,25 +462,63 @@ v9fs_file_write(struct file *filp, const char __user * data,
457 total += n; 462 total += n;
458 } while (count > 0); 463 } while (count > 0);
459 464
460 if (total > 0) { 465 if (invalidate && (total > 0)) {
461 pg_start = origin >> PAGE_CACHE_SHIFT; 466 pg_start = origin >> PAGE_CACHE_SHIFT;
462 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; 467 pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
463 if (inode->i_mapping && inode->i_mapping->nrpages) 468 if (inode->i_mapping && inode->i_mapping->nrpages)
464 invalidate_inode_pages2_range(inode->i_mapping, 469 invalidate_inode_pages2_range(inode->i_mapping,
465 pg_start, pg_end); 470 pg_start, pg_end);
466 *offset += total; 471 *offset += total;
467 i_size_write(inode, i_size_read(inode) + total); 472 i_size = i_size_read(inode);
468 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 473 if (*offset > i_size) {
474 inode_add_bytes(inode, *offset - i_size);
475 i_size_write(inode, *offset);
476 }
469 } 477 }
470
471 if (n < 0) 478 if (n < 0)
472 retval = n; 479 return n;
473 else 480
474 retval = total; 481 return total;
482}
483
484/**
485 * v9fs_file_write - write to a file
486 * @filp: file pointer to write
487 * @data: data buffer to write data from
488 * @count: size of buffer
489 * @offset: offset at which to write data
490 *
491 */
492static ssize_t
493v9fs_file_write(struct file *filp, const char __user * data,
494 size_t count, loff_t *offset)
495{
496 ssize_t retval = 0;
497 loff_t origin = *offset;
498
499
500 retval = generic_write_checks(filp, &origin, &count, 0);
501 if (retval)
502 goto out;
503
504 retval = -EINVAL;
505 if ((ssize_t) count < 0)
506 goto out;
507 retval = 0;
508 if (!count)
509 goto out;
510
511 retval = v9fs_file_write_internal(filp->f_path.dentry->d_inode,
512 filp->private_data,
513 data, count, &origin, 1);
514 /* update offset on successful write */
515 if (retval > 0)
516 *offset = origin;
475out: 517out:
476 return retval; 518 return retval;
477} 519}
478 520
521
479static int v9fs_file_fsync(struct file *filp, int datasync) 522static int v9fs_file_fsync(struct file *filp, int datasync)
480{ 523{
481 struct p9_fid *fid; 524 struct p9_fid *fid;
@@ -505,28 +548,182 @@ int v9fs_file_fsync_dotl(struct file *filp, int datasync)
505 return retval; 548 return retval;
506} 549}
507 550
508static const struct file_operations v9fs_cached_file_operations = { 551static int
552v9fs_file_mmap(struct file *file, struct vm_area_struct *vma)
553{
554 int retval;
555
556 retval = generic_file_mmap(file, vma);
557 if (!retval)
558 vma->vm_ops = &v9fs_file_vm_ops;
559
560 return retval;
561}
562
563static int
564v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
565{
566 struct v9fs_inode *v9inode;
567 struct page *page = vmf->page;
568 struct file *filp = vma->vm_file;
569 struct inode *inode = filp->f_path.dentry->d_inode;
570
571
572 P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n",
573 page, (unsigned long)filp->private_data);
574
575 v9inode = V9FS_I(inode);
576 /* make sure the cache has finished storing the page */
577 v9fs_fscache_wait_on_page_write(inode, page);
578 BUG_ON(!v9inode->writeback_fid);
579 lock_page(page);
580 if (page->mapping != inode->i_mapping)
581 goto out_unlock;
582
583 return VM_FAULT_LOCKED;
584out_unlock:
585 unlock_page(page);
586 return VM_FAULT_NOPAGE;
587}
588
589static ssize_t
590v9fs_direct_read(struct file *filp, char __user *udata, size_t count,
591 loff_t *offsetp)
592{
593 loff_t size, offset;
594 struct inode *inode;
595 struct address_space *mapping;
596
597 offset = *offsetp;
598 mapping = filp->f_mapping;
599 inode = mapping->host;
600 if (!count)
601 return 0;
602 size = i_size_read(inode);
603 if (offset < size)
604 filemap_write_and_wait_range(mapping, offset,
605 offset + count - 1);
606
607 return v9fs_file_read(filp, udata, count, offsetp);
608}
609
610/**
611 * v9fs_cached_file_read - read from a file
612 * @filp: file pointer to read
613 * @udata: user data buffer to read data into
614 * @count: size of buffer
615 * @offset: offset at which to read data
616 *
617 */
618static ssize_t
619v9fs_cached_file_read(struct file *filp, char __user *data, size_t count,
620 loff_t *offset)
621{
622 if (filp->f_flags & O_DIRECT)
623 return v9fs_direct_read(filp, data, count, offset);
624 return do_sync_read(filp, data, count, offset);
625}
626
627static ssize_t
628v9fs_direct_write(struct file *filp, const char __user * data,
629 size_t count, loff_t *offsetp)
630{
631 loff_t offset;
632 ssize_t retval;
633 struct inode *inode;
634 struct address_space *mapping;
635
636 offset = *offsetp;
637 mapping = filp->f_mapping;
638 inode = mapping->host;
639 if (!count)
640 return 0;
641
642 mutex_lock(&inode->i_mutex);
643 retval = filemap_write_and_wait_range(mapping, offset,
644 offset + count - 1);
645 if (retval)
646 goto err_out;
647 /*
648 * After a write we want buffered reads to be sure to go to disk to get
649 * the new data. We invalidate clean cached page from the region we're
650 * about to write. We do this *before* the write so that if we fail
651 * here we fall back to buffered write
652 */
653 if (mapping->nrpages) {
654 pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT;
655 pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT;
656
657 retval = invalidate_inode_pages2_range(mapping,
658 pg_start, pg_end);
659 /*
660 * If a page can not be invalidated, fall back
661 * to buffered write.
662 */
663 if (retval) {
664 if (retval == -EBUSY)
665 goto buff_write;
666 goto err_out;
667 }
668 }
669 retval = v9fs_file_write(filp, data, count, offsetp);
670err_out:
671 mutex_unlock(&inode->i_mutex);
672 return retval;
673
674buff_write:
675 mutex_unlock(&inode->i_mutex);
676 return do_sync_write(filp, data, count, offsetp);
677}
678
679/**
680 * v9fs_cached_file_write - write to a file
681 * @filp: file pointer to write
682 * @data: data buffer to write data from
683 * @count: size of buffer
684 * @offset: offset at which to write data
685 *
686 */
687static ssize_t
688v9fs_cached_file_write(struct file *filp, const char __user * data,
689 size_t count, loff_t *offset)
690{
691
692 if (filp->f_flags & O_DIRECT)
693 return v9fs_direct_write(filp, data, count, offset);
694 return do_sync_write(filp, data, count, offset);
695}
696
697static const struct vm_operations_struct v9fs_file_vm_ops = {
698 .fault = filemap_fault,
699 .page_mkwrite = v9fs_vm_page_mkwrite,
700};
701
702
703const struct file_operations v9fs_cached_file_operations = {
509 .llseek = generic_file_llseek, 704 .llseek = generic_file_llseek,
510 .read = do_sync_read, 705 .read = v9fs_cached_file_read,
706 .write = v9fs_cached_file_write,
511 .aio_read = generic_file_aio_read, 707 .aio_read = generic_file_aio_read,
512 .write = v9fs_file_write, 708 .aio_write = generic_file_aio_write,
513 .open = v9fs_file_open, 709 .open = v9fs_file_open,
514 .release = v9fs_dir_release, 710 .release = v9fs_dir_release,
515 .lock = v9fs_file_lock, 711 .lock = v9fs_file_lock,
516 .mmap = generic_file_readonly_mmap, 712 .mmap = v9fs_file_mmap,
517 .fsync = v9fs_file_fsync, 713 .fsync = v9fs_file_fsync,
518}; 714};
519 715
520static const struct file_operations v9fs_cached_file_operations_dotl = { 716const struct file_operations v9fs_cached_file_operations_dotl = {
521 .llseek = generic_file_llseek, 717 .llseek = generic_file_llseek,
522 .read = do_sync_read, 718 .read = v9fs_cached_file_read,
719 .write = v9fs_cached_file_write,
523 .aio_read = generic_file_aio_read, 720 .aio_read = generic_file_aio_read,
524 .write = v9fs_file_write, 721 .aio_write = generic_file_aio_write,
525 .open = v9fs_file_open, 722 .open = v9fs_file_open,
526 .release = v9fs_dir_release, 723 .release = v9fs_dir_release,
527 .lock = v9fs_file_lock_dotl, 724 .lock = v9fs_file_lock_dotl,
528 .flock = v9fs_file_flock_dotl, 725 .flock = v9fs_file_flock_dotl,
529 .mmap = generic_file_readonly_mmap, 726 .mmap = v9fs_file_mmap,
530 .fsync = v9fs_file_fsync_dotl, 727 .fsync = v9fs_file_fsync_dotl,
531}; 728};
532 729
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index b76a40bdf4c2..7f6c67703195 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -203,26 +203,26 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
203 wstat->extension = NULL; 203 wstat->extension = NULL;
204} 204}
205 205
206#ifdef CONFIG_9P_FSCACHE
207/** 206/**
208 * v9fs_alloc_inode - helper function to allocate an inode 207 * v9fs_alloc_inode - helper function to allocate an inode
209 * This callback is executed before setting up the inode so that we
210 * can associate a vcookie with each inode.
211 * 208 *
212 */ 209 */
213
214struct inode *v9fs_alloc_inode(struct super_block *sb) 210struct inode *v9fs_alloc_inode(struct super_block *sb)
215{ 211{
216 struct v9fs_cookie *vcookie; 212 struct v9fs_inode *v9inode;
217 vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache, 213 v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache,
218 GFP_KERNEL); 214 GFP_KERNEL);
219 if (!vcookie) 215 if (!v9inode)
220 return NULL; 216 return NULL;
221 217#ifdef CONFIG_9P_FSCACHE
222 vcookie->fscache = NULL; 218 v9inode->fscache = NULL;
223 vcookie->qid = NULL; 219 v9inode->fscache_key = NULL;
224 spin_lock_init(&vcookie->lock); 220 spin_lock_init(&v9inode->fscache_lock);
225 return &vcookie->inode; 221#endif
222 v9inode->writeback_fid = NULL;
223 v9inode->cache_validity = 0;
224 mutex_init(&v9inode->v_mutex);
225 return &v9inode->vfs_inode;
226} 226}
227 227
228/** 228/**
@@ -234,35 +234,18 @@ static void v9fs_i_callback(struct rcu_head *head)
234{ 234{
235 struct inode *inode = container_of(head, struct inode, i_rcu); 235 struct inode *inode = container_of(head, struct inode, i_rcu);
236 INIT_LIST_HEAD(&inode->i_dentry); 236 INIT_LIST_HEAD(&inode->i_dentry);
237 kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); 237 kmem_cache_free(v9fs_inode_cache, V9FS_I(inode));
238} 238}
239 239
240void v9fs_destroy_inode(struct inode *inode) 240void v9fs_destroy_inode(struct inode *inode)
241{ 241{
242 call_rcu(&inode->i_rcu, v9fs_i_callback); 242 call_rcu(&inode->i_rcu, v9fs_i_callback);
243} 243}
244#endif
245
246/**
247 * v9fs_get_inode - helper function to setup an inode
248 * @sb: superblock
249 * @mode: mode to setup inode with
250 *
251 */
252 244
253struct inode *v9fs_get_inode(struct super_block *sb, int mode) 245int v9fs_init_inode(struct v9fs_session_info *v9ses,
246 struct inode *inode, int mode)
254{ 247{
255 int err; 248 int err = 0;
256 struct inode *inode;
257 struct v9fs_session_info *v9ses = sb->s_fs_info;
258
259 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
260
261 inode = new_inode(sb);
262 if (!inode) {
263 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
264 return ERR_PTR(-ENOMEM);
265 }
266 249
267 inode_init_owner(inode, NULL, mode); 250 inode_init_owner(inode, NULL, mode);
268 inode->i_blocks = 0; 251 inode->i_blocks = 0;
@@ -292,14 +275,20 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
292 case S_IFREG: 275 case S_IFREG:
293 if (v9fs_proto_dotl(v9ses)) { 276 if (v9fs_proto_dotl(v9ses)) {
294 inode->i_op = &v9fs_file_inode_operations_dotl; 277 inode->i_op = &v9fs_file_inode_operations_dotl;
295 inode->i_fop = &v9fs_file_operations_dotl; 278 if (v9ses->cache)
279 inode->i_fop =
280 &v9fs_cached_file_operations_dotl;
281 else
282 inode->i_fop = &v9fs_file_operations_dotl;
296 } else { 283 } else {
297 inode->i_op = &v9fs_file_inode_operations; 284 inode->i_op = &v9fs_file_inode_operations;
298 inode->i_fop = &v9fs_file_operations; 285 if (v9ses->cache)
286 inode->i_fop = &v9fs_cached_file_operations;
287 else
288 inode->i_fop = &v9fs_file_operations;
299 } 289 }
300 290
301 break; 291 break;
302
303 case S_IFLNK: 292 case S_IFLNK:
304 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { 293 if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) {
305 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " 294 P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with "
@@ -335,12 +324,37 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode)
335 err = -EINVAL; 324 err = -EINVAL;
336 goto error; 325 goto error;
337 } 326 }
327error:
328 return err;
338 329
339 return inode; 330}
340 331
341error: 332/**
342 iput(inode); 333 * v9fs_get_inode - helper function to setup an inode
343 return ERR_PTR(err); 334 * @sb: superblock
335 * @mode: mode to setup inode with
336 *
337 */
338
339struct inode *v9fs_get_inode(struct super_block *sb, int mode)
340{
341 int err;
342 struct inode *inode;
343 struct v9fs_session_info *v9ses = sb->s_fs_info;
344
345 P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode);
346
347 inode = new_inode(sb);
348 if (!inode) {
349 P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n");
350 return ERR_PTR(-ENOMEM);
351 }
352 err = v9fs_init_inode(v9ses, inode, mode);
353 if (err) {
354 iput(inode);
355 return ERR_PTR(err);
356 }
357 return inode;
344} 358}
345 359
346/* 360/*
@@ -403,6 +417,8 @@ error:
403 */ 417 */
404void v9fs_evict_inode(struct inode *inode) 418void v9fs_evict_inode(struct inode *inode)
405{ 419{
420 struct v9fs_inode *v9inode = V9FS_I(inode);
421
406 truncate_inode_pages(inode->i_mapping, 0); 422 truncate_inode_pages(inode->i_mapping, 0);
407 end_writeback(inode); 423 end_writeback(inode);
408 filemap_fdatawrite(inode->i_mapping); 424 filemap_fdatawrite(inode->i_mapping);
@@ -410,41 +426,67 @@ void v9fs_evict_inode(struct inode *inode)
410#ifdef CONFIG_9P_FSCACHE 426#ifdef CONFIG_9P_FSCACHE
411 v9fs_cache_inode_put_cookie(inode); 427 v9fs_cache_inode_put_cookie(inode);
412#endif 428#endif
429 /* clunk the fid stashed in writeback_fid */
430 if (v9inode->writeback_fid) {
431 p9_client_clunk(v9inode->writeback_fid);
432 v9inode->writeback_fid = NULL;
433 }
413} 434}
414 435
415struct inode * 436static struct inode *v9fs_qid_iget(struct super_block *sb,
416v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, 437 struct p9_qid *qid,
417 struct super_block *sb) 438 struct p9_wstat *st)
418{ 439{
419 int err, umode; 440 int retval, umode;
420 struct inode *ret = NULL; 441 unsigned long i_ino;
421 struct p9_wstat *st; 442 struct inode *inode;
422 443 struct v9fs_session_info *v9ses = sb->s_fs_info;
423 st = p9_client_stat(fid);
424 if (IS_ERR(st))
425 return ERR_CAST(st);
426 444
445 i_ino = v9fs_qid2ino(qid);
446 inode = iget_locked(sb, i_ino);
447 if (!inode)
448 return ERR_PTR(-ENOMEM);
449 if (!(inode->i_state & I_NEW))
450 return inode;
451 /*
452 * initialize the inode with the stat info
453 * FIXME!! we may need support for stale inodes
454 * later.
455 */
427 umode = p9mode2unixmode(v9ses, st->mode); 456 umode = p9mode2unixmode(v9ses, st->mode);
428 ret = v9fs_get_inode(sb, umode); 457 retval = v9fs_init_inode(v9ses, inode, umode);
429 if (IS_ERR(ret)) { 458 if (retval)
430 err = PTR_ERR(ret);
431 goto error; 459 goto error;
432 }
433
434 v9fs_stat2inode(st, ret, sb);
435 ret->i_ino = v9fs_qid2ino(&st->qid);
436 460
461 v9fs_stat2inode(st, inode, sb);
437#ifdef CONFIG_9P_FSCACHE 462#ifdef CONFIG_9P_FSCACHE
438 v9fs_vcookie_set_qid(ret, &st->qid); 463 v9fs_fscache_set_key(inode, &st->qid);
439 v9fs_cache_inode_get_cookie(ret); 464 v9fs_cache_inode_get_cookie(inode);
440#endif 465#endif
441 p9stat_free(st); 466 unlock_new_inode(inode);
442 kfree(st); 467 return inode;
443 return ret;
444error: 468error:
469 unlock_new_inode(inode);
470 iput(inode);
471 return ERR_PTR(retval);
472
473}
474
475struct inode *
476v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
477 struct super_block *sb)
478{
479 struct p9_wstat *st;
480 struct inode *inode = NULL;
481
482 st = p9_client_stat(fid);
483 if (IS_ERR(st))
484 return ERR_CAST(st);
485
486 inode = v9fs_qid_iget(sb, &st->qid, st);
445 p9stat_free(st); 487 p9stat_free(st);
446 kfree(st); 488 kfree(st);
447 return ERR_PTR(err); 489 return inode;
448} 490}
449 491
450/** 492/**
@@ -458,8 +500,8 @@ error:
458static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) 500static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
459{ 501{
460 int retval; 502 int retval;
461 struct inode *file_inode;
462 struct p9_fid *v9fid; 503 struct p9_fid *v9fid;
504 struct inode *file_inode;
463 505
464 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, 506 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
465 rmdir); 507 rmdir);
@@ -470,8 +512,20 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
470 return PTR_ERR(v9fid); 512 return PTR_ERR(v9fid);
471 513
472 retval = p9_client_remove(v9fid); 514 retval = p9_client_remove(v9fid);
473 if (!retval) 515 if (!retval) {
474 drop_nlink(file_inode); 516 /*
517 * directories on unlink should have zero
518 * link count
519 */
520 if (rmdir) {
521 clear_nlink(file_inode);
522 drop_nlink(dir);
523 } else
524 drop_nlink(file_inode);
525
526 v9fs_invalidate_inode_attr(file_inode);
527 v9fs_invalidate_inode_attr(dir);
528 }
475 return retval; 529 return retval;
476} 530}
477 531
@@ -531,7 +585,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
531 } 585 }
532 586
533 /* instantiate inode and assign the unopened fid to the dentry */ 587 /* instantiate inode and assign the unopened fid to the dentry */
534 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 588 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
535 if (IS_ERR(inode)) { 589 if (IS_ERR(inode)) {
536 err = PTR_ERR(inode); 590 err = PTR_ERR(inode);
537 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 591 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -570,9 +624,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
570 int err; 624 int err;
571 u32 perm; 625 u32 perm;
572 int flags; 626 int flags;
573 struct v9fs_session_info *v9ses;
574 struct p9_fid *fid;
575 struct file *filp; 627 struct file *filp;
628 struct v9fs_inode *v9inode;
629 struct v9fs_session_info *v9ses;
630 struct p9_fid *fid, *inode_fid;
576 631
577 err = 0; 632 err = 0;
578 fid = NULL; 633 fid = NULL;
@@ -592,8 +647,29 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
592 goto error; 647 goto error;
593 } 648 }
594 649
650 v9fs_invalidate_inode_attr(dir);
595 /* if we are opening a file, assign the open fid to the file */ 651 /* if we are opening a file, assign the open fid to the file */
596 if (nd && nd->flags & LOOKUP_OPEN) { 652 if (nd && nd->flags & LOOKUP_OPEN) {
653 v9inode = V9FS_I(dentry->d_inode);
654 mutex_lock(&v9inode->v_mutex);
655 if (v9ses->cache && !v9inode->writeback_fid &&
656 ((flags & O_ACCMODE) != O_RDONLY)) {
657 /*
658 * clone a fid and add it to writeback_fid
659 * we do it during open time instead of
660 * page dirty time via write_begin/page_mkwrite
661 * because we want write after unlink usecase
662 * to work.
663 */
664 inode_fid = v9fs_writeback_fid(dentry);
665 if (IS_ERR(inode_fid)) {
666 err = PTR_ERR(inode_fid);
667 mutex_unlock(&v9inode->v_mutex);
668 goto error;
669 }
670 v9inode->writeback_fid = (void *) inode_fid;
671 }
672 mutex_unlock(&v9inode->v_mutex);
597 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 673 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
598 if (IS_ERR(filp)) { 674 if (IS_ERR(filp)) {
599 err = PTR_ERR(filp); 675 err = PTR_ERR(filp);
@@ -601,6 +677,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
601 } 677 }
602 678
603 filp->private_data = fid; 679 filp->private_data = fid;
680#ifdef CONFIG_9P_FSCACHE
681 if (v9ses->cache)
682 v9fs_cache_inode_set_cookie(dentry->d_inode, filp);
683#endif
604 } else 684 } else
605 p9_client_clunk(fid); 685 p9_client_clunk(fid);
606 686
@@ -625,8 +705,8 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
625{ 705{
626 int err; 706 int err;
627 u32 perm; 707 u32 perm;
628 struct v9fs_session_info *v9ses;
629 struct p9_fid *fid; 708 struct p9_fid *fid;
709 struct v9fs_session_info *v9ses;
630 710
631 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); 711 P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
632 err = 0; 712 err = 0;
@@ -636,6 +716,9 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
636 if (IS_ERR(fid)) { 716 if (IS_ERR(fid)) {
637 err = PTR_ERR(fid); 717 err = PTR_ERR(fid);
638 fid = NULL; 718 fid = NULL;
719 } else {
720 inc_nlink(dir);
721 v9fs_invalidate_inode_attr(dir);
639 } 722 }
640 723
641 if (fid) 724 if (fid)
@@ -687,7 +770,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
687 return ERR_PTR(result); 770 return ERR_PTR(result);
688 } 771 }
689 772
690 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 773 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
691 if (IS_ERR(inode)) { 774 if (IS_ERR(inode)) {
692 result = PTR_ERR(inode); 775 result = PTR_ERR(inode);
693 inode = NULL; 776 inode = NULL;
@@ -747,17 +830,19 @@ int
747v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, 830v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
748 struct inode *new_dir, struct dentry *new_dentry) 831 struct inode *new_dir, struct dentry *new_dentry)
749{ 832{
833 int retval;
750 struct inode *old_inode; 834 struct inode *old_inode;
835 struct inode *new_inode;
751 struct v9fs_session_info *v9ses; 836 struct v9fs_session_info *v9ses;
752 struct p9_fid *oldfid; 837 struct p9_fid *oldfid;
753 struct p9_fid *olddirfid; 838 struct p9_fid *olddirfid;
754 struct p9_fid *newdirfid; 839 struct p9_fid *newdirfid;
755 struct p9_wstat wstat; 840 struct p9_wstat wstat;
756 int retval;
757 841
758 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 842 P9_DPRINTK(P9_DEBUG_VFS, "\n");
759 retval = 0; 843 retval = 0;
760 old_inode = old_dentry->d_inode; 844 old_inode = old_dentry->d_inode;
845 new_inode = new_dentry->d_inode;
761 v9ses = v9fs_inode2v9ses(old_inode); 846 v9ses = v9fs_inode2v9ses(old_inode);
762 oldfid = v9fs_fid_lookup(old_dentry); 847 oldfid = v9fs_fid_lookup(old_dentry);
763 if (IS_ERR(oldfid)) 848 if (IS_ERR(oldfid))
@@ -798,9 +883,30 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
798 retval = p9_client_wstat(oldfid, &wstat); 883 retval = p9_client_wstat(oldfid, &wstat);
799 884
800clunk_newdir: 885clunk_newdir:
801 if (!retval) 886 if (!retval) {
887 if (new_inode) {
888 if (S_ISDIR(new_inode->i_mode))
889 clear_nlink(new_inode);
890 else
891 drop_nlink(new_inode);
892 /*
893 * Work around vfs rename rehash bug with
894 * FS_RENAME_DOES_D_MOVE
895 */
896 v9fs_invalidate_inode_attr(new_inode);
897 }
898 if (S_ISDIR(old_inode->i_mode)) {
899 if (!new_inode)
900 inc_nlink(new_dir);
901 drop_nlink(old_dir);
902 }
903 v9fs_invalidate_inode_attr(old_inode);
904 v9fs_invalidate_inode_attr(old_dir);
905 v9fs_invalidate_inode_attr(new_dir);
906
802 /* successful rename */ 907 /* successful rename */
803 d_move(old_dentry, new_dentry); 908 d_move(old_dentry, new_dentry);
909 }
804 up_write(&v9ses->rename_sem); 910 up_write(&v9ses->rename_sem);
805 p9_client_clunk(newdirfid); 911 p9_client_clunk(newdirfid);
806 912
@@ -830,10 +936,11 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
830 936
831 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 937 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
832 err = -EPERM; 938 err = -EPERM;
833 v9ses = v9fs_inode2v9ses(dentry->d_inode); 939 v9ses = v9fs_dentry2v9ses(dentry);
834 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 940 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
835 return simple_getattr(mnt, dentry, stat); 941 generic_fillattr(dentry->d_inode, stat);
836 942 return 0;
943 }
837 fid = v9fs_fid_lookup(dentry); 944 fid = v9fs_fid_lookup(dentry);
838 if (IS_ERR(fid)) 945 if (IS_ERR(fid))
839 return PTR_ERR(fid); 946 return PTR_ERR(fid);
@@ -865,8 +972,12 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
865 struct p9_wstat wstat; 972 struct p9_wstat wstat;
866 973
867 P9_DPRINTK(P9_DEBUG_VFS, "\n"); 974 P9_DPRINTK(P9_DEBUG_VFS, "\n");
975 retval = inode_change_ok(dentry->d_inode, iattr);
976 if (retval)
977 return retval;
978
868 retval = -EPERM; 979 retval = -EPERM;
869 v9ses = v9fs_inode2v9ses(dentry->d_inode); 980 v9ses = v9fs_dentry2v9ses(dentry);
870 fid = v9fs_fid_lookup(dentry); 981 fid = v9fs_fid_lookup(dentry);
871 if(IS_ERR(fid)) 982 if(IS_ERR(fid))
872 return PTR_ERR(fid); 983 return PTR_ERR(fid);
@@ -892,16 +1003,19 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
892 wstat.n_gid = iattr->ia_gid; 1003 wstat.n_gid = iattr->ia_gid;
893 } 1004 }
894 1005
1006 /* Write all dirty data */
1007 if (S_ISREG(dentry->d_inode->i_mode))
1008 filemap_write_and_wait(dentry->d_inode->i_mapping);
1009
895 retval = p9_client_wstat(fid, &wstat); 1010 retval = p9_client_wstat(fid, &wstat);
896 if (retval < 0) 1011 if (retval < 0)
897 return retval; 1012 return retval;
898 1013
899 if ((iattr->ia_valid & ATTR_SIZE) && 1014 if ((iattr->ia_valid & ATTR_SIZE) &&
900 iattr->ia_size != i_size_read(dentry->d_inode)) { 1015 iattr->ia_size != i_size_read(dentry->d_inode))
901 retval = vmtruncate(dentry->d_inode, iattr->ia_size); 1016 truncate_setsize(dentry->d_inode, iattr->ia_size);
902 if (retval) 1017
903 return retval; 1018 v9fs_invalidate_inode_attr(dentry->d_inode);
904 }
905 1019
906 setattr_copy(dentry->d_inode, iattr); 1020 setattr_copy(dentry->d_inode, iattr);
907 mark_inode_dirty(dentry->d_inode); 1021 mark_inode_dirty(dentry->d_inode);
@@ -924,6 +1038,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
924 char tag_name[14]; 1038 char tag_name[14];
925 unsigned int i_nlink; 1039 unsigned int i_nlink;
926 struct v9fs_session_info *v9ses = sb->s_fs_info; 1040 struct v9fs_session_info *v9ses = sb->s_fs_info;
1041 struct v9fs_inode *v9inode = V9FS_I(inode);
927 1042
928 inode->i_nlink = 1; 1043 inode->i_nlink = 1;
929 1044
@@ -983,6 +1098,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
983 1098
984 /* not real number of blocks, but 512 byte ones ... */ 1099 /* not real number of blocks, but 512 byte ones ... */
985 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; 1100 inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
1101 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
986} 1102}
987 1103
988/** 1104/**
@@ -1023,7 +1139,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
1023 1139
1024 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); 1140 P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
1025 retval = -EPERM; 1141 retval = -EPERM;
1026 v9ses = v9fs_inode2v9ses(dentry->d_inode); 1142 v9ses = v9fs_dentry2v9ses(dentry);
1027 fid = v9fs_fid_lookup(dentry); 1143 fid = v9fs_fid_lookup(dentry);
1028 if (IS_ERR(fid)) 1144 if (IS_ERR(fid))
1029 return PTR_ERR(fid); 1145 return PTR_ERR(fid);
@@ -1115,8 +1231,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1115 int mode, const char *extension) 1231 int mode, const char *extension)
1116{ 1232{
1117 u32 perm; 1233 u32 perm;
1118 struct v9fs_session_info *v9ses;
1119 struct p9_fid *fid; 1234 struct p9_fid *fid;
1235 struct v9fs_session_info *v9ses;
1120 1236
1121 v9ses = v9fs_inode2v9ses(dir); 1237 v9ses = v9fs_inode2v9ses(dir);
1122 if (!v9fs_proto_dotu(v9ses)) { 1238 if (!v9fs_proto_dotu(v9ses)) {
@@ -1130,6 +1246,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
1130 if (IS_ERR(fid)) 1246 if (IS_ERR(fid))
1131 return PTR_ERR(fid); 1247 return PTR_ERR(fid);
1132 1248
1249 v9fs_invalidate_inode_attr(dir);
1133 p9_client_clunk(fid); 1250 p9_client_clunk(fid);
1134 return 0; 1251 return 0;
1135} 1252}
@@ -1166,8 +1283,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1166 struct dentry *dentry) 1283 struct dentry *dentry)
1167{ 1284{
1168 int retval; 1285 int retval;
1169 struct p9_fid *oldfid;
1170 char *name; 1286 char *name;
1287 struct p9_fid *oldfid;
1171 1288
1172 P9_DPRINTK(P9_DEBUG_VFS, 1289 P9_DPRINTK(P9_DEBUG_VFS,
1173 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, 1290 " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
@@ -1186,7 +1303,10 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
1186 sprintf(name, "%d\n", oldfid->fid); 1303 sprintf(name, "%d\n", oldfid->fid);
1187 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); 1304 retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
1188 __putname(name); 1305 __putname(name);
1189 1306 if (!retval) {
1307 v9fs_refresh_inode(oldfid, old_dentry->d_inode);
1308 v9fs_invalidate_inode_attr(dir);
1309 }
1190clunk_fid: 1310clunk_fid:
1191 p9_client_clunk(oldfid); 1311 p9_client_clunk(oldfid);
1192 return retval; 1312 return retval;
@@ -1237,6 +1357,32 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
1237 return retval; 1357 return retval;
1238} 1358}
1239 1359
1360int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
1361{
1362 loff_t i_size;
1363 struct p9_wstat *st;
1364 struct v9fs_session_info *v9ses;
1365
1366 v9ses = v9fs_inode2v9ses(inode);
1367 st = p9_client_stat(fid);
1368 if (IS_ERR(st))
1369 return PTR_ERR(st);
1370
1371 spin_lock(&inode->i_lock);
1372 /*
1373 * We don't want to refresh inode->i_size,
1374 * because we may have cached data
1375 */
1376 i_size = inode->i_size;
1377 v9fs_stat2inode(st, inode, inode->i_sb);
1378 if (v9ses->cache)
1379 inode->i_size = i_size;
1380 spin_unlock(&inode->i_lock);
1381 p9stat_free(st);
1382 kfree(st);
1383 return 0;
1384}
1385
1240static const struct inode_operations v9fs_dir_inode_operations_dotu = { 1386static const struct inode_operations v9fs_dir_inode_operations_dotu = {
1241 .create = v9fs_vfs_create, 1387 .create = v9fs_vfs_create,
1242 .lookup = v9fs_vfs_lookup, 1388 .lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index fe3ffa9aace4..ffbb113d5f33 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,40 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
86 return dentry; 86 return dentry;
87} 87}
88 88
89static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
90 struct p9_qid *qid,
91 struct p9_fid *fid,
92 struct p9_stat_dotl *st)
93{
94 int retval;
95 unsigned long i_ino;
96 struct inode *inode;
97 struct v9fs_session_info *v9ses = sb->s_fs_info;
98
99 i_ino = v9fs_qid2ino(qid);
100 inode = iget_locked(sb, i_ino);
101 if (!inode)
102 return ERR_PTR(-ENOMEM);
103 if (!(inode->i_state & I_NEW))
104 return inode;
105 /*
106 * initialize the inode with the stat info
107 * FIXME!! we may need support for stale inodes
108 * later.
109 */
110 retval = v9fs_init_inode(v9ses, inode, st->st_mode);
111 if (retval)
112 goto error;
113
114 v9fs_stat2inode_dotl(st, inode);
115#ifdef CONFIG_9P_FSCACHE
116 v9fs_fscache_set_key(inode, &st->qid);
117 v9fs_cache_inode_get_cookie(inode);
118#endif
119 retval = v9fs_get_acl(inode, fid);
120 if (retval)
121 goto error;
122
123 unlock_new_inode(inode);
124 return inode;
125error:
126 unlock_new_inode(inode);
127 iput(inode);
128 return ERR_PTR(retval);
129
130}
131
89struct inode * 132struct inode *
90v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, 133v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
91 struct super_block *sb) 134 struct super_block *sb)
92{ 135{
93 struct inode *ret = NULL;
94 int err;
95 struct p9_stat_dotl *st; 136 struct p9_stat_dotl *st;
137 struct inode *inode = NULL;
96 138
97 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 139 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
98 if (IS_ERR(st)) 140 if (IS_ERR(st))
99 return ERR_CAST(st); 141 return ERR_CAST(st);
100 142
101 ret = v9fs_get_inode(sb, st->st_mode); 143 inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st);
102 if (IS_ERR(ret)) {
103 err = PTR_ERR(ret);
104 goto error;
105 }
106
107 v9fs_stat2inode_dotl(st, ret);
108 ret->i_ino = v9fs_qid2ino(&st->qid);
109#ifdef CONFIG_9P_FSCACHE
110 v9fs_vcookie_set_qid(ret, &st->qid);
111 v9fs_cache_inode_get_cookie(ret);
112#endif
113 err = v9fs_get_acl(ret, fid);
114 if (err) {
115 iput(ret);
116 goto error;
117 }
118 kfree(st); 144 kfree(st);
119 return ret; 145 return inode;
120error:
121 kfree(st);
122 return ERR_PTR(err);
123} 146}
124 147
125/** 148/**
@@ -136,16 +159,17 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
136 struct nameidata *nd) 159 struct nameidata *nd)
137{ 160{
138 int err = 0; 161 int err = 0;
139 char *name = NULL;
140 gid_t gid; 162 gid_t gid;
141 int flags; 163 int flags;
142 mode_t mode; 164 mode_t mode;
143 struct v9fs_session_info *v9ses; 165 char *name = NULL;
144 struct p9_fid *fid = NULL;
145 struct p9_fid *dfid, *ofid;
146 struct file *filp; 166 struct file *filp;
147 struct p9_qid qid; 167 struct p9_qid qid;
148 struct inode *inode; 168 struct inode *inode;
169 struct p9_fid *fid = NULL;
170 struct v9fs_inode *v9inode;
171 struct p9_fid *dfid, *ofid, *inode_fid;
172 struct v9fs_session_info *v9ses;
149 struct posix_acl *pacl = NULL, *dacl = NULL; 173 struct posix_acl *pacl = NULL, *dacl = NULL;
150 174
151 v9ses = v9fs_inode2v9ses(dir); 175 v9ses = v9fs_inode2v9ses(dir);
@@ -196,6 +220,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
196 err); 220 err);
197 goto error; 221 goto error;
198 } 222 }
223 v9fs_invalidate_inode_attr(dir);
199 224
200 /* instantiate inode and assign the unopened fid to the dentry */ 225 /* instantiate inode and assign the unopened fid to the dentry */
201 fid = p9_client_walk(dfid, 1, &name, 1); 226 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -205,7 +230,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
205 fid = NULL; 230 fid = NULL;
206 goto error; 231 goto error;
207 } 232 }
208 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 233 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
209 if (IS_ERR(inode)) { 234 if (IS_ERR(inode)) {
210 err = PTR_ERR(inode); 235 err = PTR_ERR(inode);
211 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 236 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -219,6 +244,26 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
219 /* Now set the ACL based on the default value */ 244 /* Now set the ACL based on the default value */
220 v9fs_set_create_acl(dentry, dacl, pacl); 245 v9fs_set_create_acl(dentry, dacl, pacl);
221 246
247 v9inode = V9FS_I(inode);
248 mutex_lock(&v9inode->v_mutex);
249 if (v9ses->cache && !v9inode->writeback_fid &&
250 ((flags & O_ACCMODE) != O_RDONLY)) {
251 /*
252 * clone a fid and add it to writeback_fid
253 * we do it during open time instead of
254 * page dirty time via write_begin/page_mkwrite
255 * because we want write after unlink usecase
256 * to work.
257 */
258 inode_fid = v9fs_writeback_fid(dentry);
259 if (IS_ERR(inode_fid)) {
260 err = PTR_ERR(inode_fid);
261 mutex_unlock(&v9inode->v_mutex);
262 goto error;
263 }
264 v9inode->writeback_fid = (void *) inode_fid;
265 }
266 mutex_unlock(&v9inode->v_mutex);
222 /* Since we are opening a file, assign the open fid to the file */ 267 /* Since we are opening a file, assign the open fid to the file */
223 filp = lookup_instantiate_filp(nd, dentry, generic_file_open); 268 filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
224 if (IS_ERR(filp)) { 269 if (IS_ERR(filp)) {
@@ -226,6 +271,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
226 return PTR_ERR(filp); 271 return PTR_ERR(filp);
227 } 272 }
228 filp->private_data = ofid; 273 filp->private_data = ofid;
274#ifdef CONFIG_9P_FSCACHE
275 if (v9ses->cache)
276 v9fs_cache_inode_set_cookie(inode, filp);
277#endif
229 return 0; 278 return 0;
230 279
231error: 280error:
@@ -300,7 +349,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
300 goto error; 349 goto error;
301 } 350 }
302 351
303 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 352 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
304 if (IS_ERR(inode)) { 353 if (IS_ERR(inode)) {
305 err = PTR_ERR(inode); 354 err = PTR_ERR(inode);
306 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 355 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -327,7 +376,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
327 } 376 }
328 /* Now set the ACL based on the default value */ 377 /* Now set the ACL based on the default value */
329 v9fs_set_create_acl(dentry, dacl, pacl); 378 v9fs_set_create_acl(dentry, dacl, pacl);
330 379 inc_nlink(dir);
380 v9fs_invalidate_inode_attr(dir);
331error: 381error:
332 if (fid) 382 if (fid)
333 p9_client_clunk(fid); 383 p9_client_clunk(fid);
@@ -345,10 +395,11 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
345 395
346 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); 396 P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
347 err = -EPERM; 397 err = -EPERM;
348 v9ses = v9fs_inode2v9ses(dentry->d_inode); 398 v9ses = v9fs_dentry2v9ses(dentry);
349 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) 399 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
350 return simple_getattr(mnt, dentry, stat); 400 generic_fillattr(dentry->d_inode, stat);
351 401 return 0;
402 }
352 fid = v9fs_fid_lookup(dentry); 403 fid = v9fs_fid_lookup(dentry);
353 if (IS_ERR(fid)) 404 if (IS_ERR(fid))
354 return PTR_ERR(fid); 405 return PTR_ERR(fid);
@@ -401,22 +452,24 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
401 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; 452 p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec;
402 453
403 retval = -EPERM; 454 retval = -EPERM;
404 v9ses = v9fs_inode2v9ses(dentry->d_inode); 455 v9ses = v9fs_dentry2v9ses(dentry);
405 fid = v9fs_fid_lookup(dentry); 456 fid = v9fs_fid_lookup(dentry);
406 if (IS_ERR(fid)) 457 if (IS_ERR(fid))
407 return PTR_ERR(fid); 458 return PTR_ERR(fid);
408 459
460 /* Write all dirty data */
461 if (S_ISREG(dentry->d_inode->i_mode))
462 filemap_write_and_wait(dentry->d_inode->i_mapping);
463
409 retval = p9_client_setattr(fid, &p9attr); 464 retval = p9_client_setattr(fid, &p9attr);
410 if (retval < 0) 465 if (retval < 0)
411 return retval; 466 return retval;
412 467
413 if ((iattr->ia_valid & ATTR_SIZE) && 468 if ((iattr->ia_valid & ATTR_SIZE) &&
414 iattr->ia_size != i_size_read(dentry->d_inode)) { 469 iattr->ia_size != i_size_read(dentry->d_inode))
415 retval = vmtruncate(dentry->d_inode, iattr->ia_size); 470 truncate_setsize(dentry->d_inode, iattr->ia_size);
416 if (retval)
417 return retval;
418 }
419 471
472 v9fs_invalidate_inode_attr(dentry->d_inode);
420 setattr_copy(dentry->d_inode, iattr); 473 setattr_copy(dentry->d_inode, iattr);
421 mark_inode_dirty(dentry->d_inode); 474 mark_inode_dirty(dentry->d_inode);
422 if (iattr->ia_valid & ATTR_MODE) { 475 if (iattr->ia_valid & ATTR_MODE) {
@@ -439,6 +492,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
439void 492void
440v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) 493v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
441{ 494{
495 struct v9fs_inode *v9inode = V9FS_I(inode);
442 496
443 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { 497 if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) {
444 inode->i_atime.tv_sec = stat->st_atime_sec; 498 inode->i_atime.tv_sec = stat->st_atime_sec;
@@ -497,20 +551,21 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
497 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION 551 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
498 * because the inode structure does not have fields for them. 552 * because the inode structure does not have fields for them.
499 */ 553 */
554 v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR;
500} 555}
501 556
502static int 557static int
503v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, 558v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
504 const char *symname) 559 const char *symname)
505{ 560{
506 struct v9fs_session_info *v9ses;
507 struct p9_fid *dfid;
508 struct p9_fid *fid = NULL;
509 struct inode *inode;
510 struct p9_qid qid;
511 char *name;
512 int err; 561 int err;
513 gid_t gid; 562 gid_t gid;
563 char *name;
564 struct p9_qid qid;
565 struct inode *inode;
566 struct p9_fid *dfid;
567 struct p9_fid *fid = NULL;
568 struct v9fs_session_info *v9ses;
514 569
515 name = (char *) dentry->d_name.name; 570 name = (char *) dentry->d_name.name;
516 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", 571 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n",
@@ -534,6 +589,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
534 goto error; 589 goto error;
535 } 590 }
536 591
592 v9fs_invalidate_inode_attr(dir);
537 if (v9ses->cache) { 593 if (v9ses->cache) {
538 /* Now walk from the parent so we can get an unopened fid. */ 594 /* Now walk from the parent so we can get an unopened fid. */
539 fid = p9_client_walk(dfid, 1, &name, 1); 595 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -546,7 +602,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
546 } 602 }
547 603
548 /* instantiate inode and assign the unopened fid to dentry */ 604 /* instantiate inode and assign the unopened fid to dentry */
549 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 605 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
550 if (IS_ERR(inode)) { 606 if (IS_ERR(inode)) {
551 err = PTR_ERR(inode); 607 err = PTR_ERR(inode);
552 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 608 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -588,10 +644,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
588 struct dentry *dentry) 644 struct dentry *dentry)
589{ 645{
590 int err; 646 int err;
591 struct p9_fid *dfid, *oldfid;
592 char *name; 647 char *name;
593 struct v9fs_session_info *v9ses;
594 struct dentry *dir_dentry; 648 struct dentry *dir_dentry;
649 struct p9_fid *dfid, *oldfid;
650 struct v9fs_session_info *v9ses;
595 651
596 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", 652 P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n",
597 dir->i_ino, old_dentry->d_name.name, 653 dir->i_ino, old_dentry->d_name.name,
@@ -616,29 +672,17 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
616 return err; 672 return err;
617 } 673 }
618 674
675 v9fs_invalidate_inode_attr(dir);
619 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 676 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
620 /* Get the latest stat info from server. */ 677 /* Get the latest stat info from server. */
621 struct p9_fid *fid; 678 struct p9_fid *fid;
622 struct p9_stat_dotl *st;
623
624 fid = v9fs_fid_lookup(old_dentry); 679 fid = v9fs_fid_lookup(old_dentry);
625 if (IS_ERR(fid)) 680 if (IS_ERR(fid))
626 return PTR_ERR(fid); 681 return PTR_ERR(fid);
627 682
628 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 683 v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
629 if (IS_ERR(st))
630 return PTR_ERR(st);
631
632 v9fs_stat2inode_dotl(st, old_dentry->d_inode);
633
634 kfree(st);
635 } else {
636 /* Caching disabled. No need to get upto date stat info.
637 * This dentry will be released immediately. So, just hold the
638 * inode
639 */
640 ihold(old_dentry->d_inode);
641 } 684 }
685 ihold(old_dentry->d_inode);
642 d_instantiate(dentry, old_dentry->d_inode); 686 d_instantiate(dentry, old_dentry->d_inode);
643 687
644 return err; 688 return err;
@@ -657,12 +701,12 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
657 dev_t rdev) 701 dev_t rdev)
658{ 702{
659 int err; 703 int err;
704 gid_t gid;
660 char *name; 705 char *name;
661 mode_t mode; 706 mode_t mode;
662 struct v9fs_session_info *v9ses; 707 struct v9fs_session_info *v9ses;
663 struct p9_fid *fid = NULL, *dfid = NULL; 708 struct p9_fid *fid = NULL, *dfid = NULL;
664 struct inode *inode; 709 struct inode *inode;
665 gid_t gid;
666 struct p9_qid qid; 710 struct p9_qid qid;
667 struct dentry *dir_dentry; 711 struct dentry *dir_dentry;
668 struct posix_acl *dacl = NULL, *pacl = NULL; 712 struct posix_acl *dacl = NULL, *pacl = NULL;
@@ -699,6 +743,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
699 if (err < 0) 743 if (err < 0)
700 goto error; 744 goto error;
701 745
746 v9fs_invalidate_inode_attr(dir);
702 /* instantiate inode and assign the unopened fid to the dentry */ 747 /* instantiate inode and assign the unopened fid to the dentry */
703 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { 748 if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
704 fid = p9_client_walk(dfid, 1, &name, 1); 749 fid = p9_client_walk(dfid, 1, &name, 1);
@@ -710,7 +755,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
710 goto error; 755 goto error;
711 } 756 }
712 757
713 inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); 758 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb);
714 if (IS_ERR(inode)) { 759 if (IS_ERR(inode)) {
715 err = PTR_ERR(inode); 760 err = PTR_ERR(inode);
716 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 761 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -782,6 +827,31 @@ ndset:
782 return NULL; 827 return NULL;
783} 828}
784 829
830int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
831{
832 loff_t i_size;
833 struct p9_stat_dotl *st;
834 struct v9fs_session_info *v9ses;
835
836 v9ses = v9fs_inode2v9ses(inode);
837 st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
838 if (IS_ERR(st))
839 return PTR_ERR(st);
840
841 spin_lock(&inode->i_lock);
842 /*
843 * We don't want to refresh inode->i_size,
844 * because we may have cached data
845 */
846 i_size = inode->i_size;
847 v9fs_stat2inode_dotl(st, inode);
848 if (v9ses->cache)
849 inode->i_size = i_size;
850 spin_unlock(&inode->i_lock);
851 kfree(st);
852 return 0;
853}
854
785const struct inode_operations v9fs_dir_inode_operations_dotl = { 855const struct inode_operations v9fs_dir_inode_operations_dotl = {
786 .create = v9fs_vfs_create_dotl, 856 .create = v9fs_vfs_create_dotl,
787 .lookup = v9fs_vfs_lookup, 857 .lookup = v9fs_vfs_lookup,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index dbaabe3b8131..f3eed3383e4f 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -86,12 +86,15 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
86 } else 86 } else
87 sb->s_op = &v9fs_super_ops; 87 sb->s_op = &v9fs_super_ops;
88 sb->s_bdi = &v9ses->bdi; 88 sb->s_bdi = &v9ses->bdi;
89 if (v9ses->cache)
90 sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
89 91
90 sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | 92 sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
91 MS_NOATIME; 93 if (!v9ses->cache)
94 sb->s_flags |= MS_SYNCHRONOUS;
92 95
93#ifdef CONFIG_9P_FS_POSIX_ACL 96#ifdef CONFIG_9P_FS_POSIX_ACL
94 if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT) 97 if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL)
95 sb->s_flags |= MS_POSIXACL; 98 sb->s_flags |= MS_POSIXACL;
96#endif 99#endif
97 100
@@ -151,7 +154,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
151 retval = PTR_ERR(inode); 154 retval = PTR_ERR(inode);
152 goto release_sb; 155 goto release_sb;
153 } 156 }
154
155 root = d_alloc_root(inode); 157 root = d_alloc_root(inode);
156 if (!root) { 158 if (!root) {
157 iput(inode); 159 iput(inode);
@@ -166,7 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
166 retval = PTR_ERR(st); 168 retval = PTR_ERR(st);
167 goto release_sb; 169 goto release_sb;
168 } 170 }
169 171 root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
170 v9fs_stat2inode_dotl(st, root->d_inode); 172 v9fs_stat2inode_dotl(st, root->d_inode);
171 kfree(st); 173 kfree(st);
172 } else { 174 } else {
@@ -183,10 +185,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
183 p9stat_free(st); 185 p9stat_free(st);
184 kfree(st); 186 kfree(st);
185 } 187 }
188 v9fs_fid_add(root, fid);
186 retval = v9fs_get_acl(inode, fid); 189 retval = v9fs_get_acl(inode, fid);
187 if (retval) 190 if (retval)
188 goto release_sb; 191 goto release_sb;
189 v9fs_fid_add(root, fid); 192 /*
193 * Add the root fid to session info. This is used
194 * for file system sync. We want a cloned fid here
195 * so that we can do a sync_filesystem after a
196 * shrink_dcache_for_umount
197 */
198 v9ses->root_fid = v9fs_fid_clone(root);
199 if (IS_ERR(v9ses->root_fid)) {
200 retval = PTR_ERR(v9ses->root_fid);
201 goto release_sb;
202 }
190 203
191 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); 204 P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
192 return dget(sb->s_root); 205 return dget(sb->s_root);
@@ -197,15 +210,11 @@ close_session:
197 v9fs_session_close(v9ses); 210 v9fs_session_close(v9ses);
198 kfree(v9ses); 211 kfree(v9ses);
199 return ERR_PTR(retval); 212 return ERR_PTR(retval);
200
201release_sb: 213release_sb:
202 /* 214 /*
203 * we will do the session_close and root dentry release 215 * we will do the session_close and root dentry
204 * in the below call. But we need to clunk fid, because we haven't 216 * release in the below call.
205 * attached the fid to dentry so it won't get clunked
206 * automatically.
207 */ 217 */
208 p9_client_clunk(fid);
209 deactivate_locked_super(sb); 218 deactivate_locked_super(sb);
210 return ERR_PTR(retval); 219 return ERR_PTR(retval);
211} 220}
@@ -223,7 +232,7 @@ static void v9fs_kill_super(struct super_block *s)
223 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); 232 P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
224 233
225 kill_anon_super(s); 234 kill_anon_super(s);
226 235 p9_client_clunk(v9ses->root_fid);
227 v9fs_session_cancel(v9ses); 236 v9fs_session_cancel(v9ses);
228 v9fs_session_close(v9ses); 237 v9fs_session_close(v9ses);
229 kfree(v9ses); 238 kfree(v9ses);
@@ -253,7 +262,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
253 goto done; 262 goto done;
254 } 263 }
255 264
256 v9ses = v9fs_inode2v9ses(dentry->d_inode); 265 v9ses = v9fs_dentry2v9ses(dentry);
257 if (v9fs_proto_dotl(v9ses)) { 266 if (v9fs_proto_dotl(v9ses)) {
258 res = p9_client_statfs(fid, &rs); 267 res = p9_client_statfs(fid, &rs);
259 if (res == 0) { 268 if (res == 0) {
@@ -276,11 +285,31 @@ done:
276 return res; 285 return res;
277} 286}
278 287
288static int v9fs_sync_fs(struct super_block *sb, int wait)
289{
290 struct v9fs_session_info *v9ses = sb->s_fs_info;
291
292 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb);
293 return p9_client_sync_fs(v9ses->root_fid);
294}
295
296static int v9fs_drop_inode(struct inode *inode)
297{
298 struct v9fs_session_info *v9ses;
299 v9ses = v9fs_inode2v9ses(inode);
300 if (v9ses->cache)
301 return generic_drop_inode(inode);
302 /*
303 * in case of non cached mode always drop the
304 * the inode because we want the inode attribute
305 * to always match that on the server.
306 */
307 return 1;
308}
309
279static const struct super_operations v9fs_super_ops = { 310static const struct super_operations v9fs_super_ops = {
280#ifdef CONFIG_9P_FSCACHE
281 .alloc_inode = v9fs_alloc_inode, 311 .alloc_inode = v9fs_alloc_inode,
282 .destroy_inode = v9fs_destroy_inode, 312 .destroy_inode = v9fs_destroy_inode,
283#endif
284 .statfs = simple_statfs, 313 .statfs = simple_statfs,
285 .evict_inode = v9fs_evict_inode, 314 .evict_inode = v9fs_evict_inode,
286 .show_options = generic_show_options, 315 .show_options = generic_show_options,
@@ -288,11 +317,11 @@ static const struct super_operations v9fs_super_ops = {
288}; 317};
289 318
290static const struct super_operations v9fs_super_ops_dotl = { 319static const struct super_operations v9fs_super_ops_dotl = {
291#ifdef CONFIG_9P_FSCACHE
292 .alloc_inode = v9fs_alloc_inode, 320 .alloc_inode = v9fs_alloc_inode,
293 .destroy_inode = v9fs_destroy_inode, 321 .destroy_inode = v9fs_destroy_inode,
294#endif 322 .sync_fs = v9fs_sync_fs,
295 .statfs = v9fs_statfs, 323 .statfs = v9fs_statfs,
324 .drop_inode = v9fs_drop_inode,
296 .evict_inode = v9fs_evict_inode, 325 .evict_inode = v9fs_evict_inode,
297 .show_options = generic_show_options, 326 .show_options = generic_show_options,
298 .umount_begin = v9fs_umount_begin, 327 .umount_begin = v9fs_umount_begin,
@@ -303,5 +332,5 @@ struct file_system_type v9fs_fs_type = {
303 .mount = v9fs_mount, 332 .mount = v9fs_mount,
304 .kill_sb = v9fs_kill_super, 333 .kill_sb = v9fs_kill_super,
305 .owner = THIS_MODULE, 334 .owner = THIS_MODULE,
306 .fs_flags = FS_RENAME_DOES_D_MOVE, 335 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT,
307}; 336};
diff --git a/fs/Kconfig b/fs/Kconfig
index 3db9caa57edc..f3aa9b08b228 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,7 +47,7 @@ config FS_POSIX_ACL
47 def_bool n 47 def_bool n
48 48
49config EXPORTFS 49config EXPORTFS
50 tristate 50 bool
51 51
52config FILE_LOCKING 52config FILE_LOCKING
53 bool "Enable POSIX file locking API" if EXPERT 53 bool "Enable POSIX file locking API" if EXPERT
@@ -187,6 +187,7 @@ source "fs/omfs/Kconfig"
187source "fs/hpfs/Kconfig" 187source "fs/hpfs/Kconfig"
188source "fs/qnx4/Kconfig" 188source "fs/qnx4/Kconfig"
189source "fs/romfs/Kconfig" 189source "fs/romfs/Kconfig"
190source "fs/pstore/Kconfig"
190source "fs/sysv/Kconfig" 191source "fs/sysv/Kconfig"
191source "fs/ufs/Kconfig" 192source "fs/ufs/Kconfig"
192source "fs/exofs/Kconfig" 193source "fs/exofs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index a7f7cef0c0c8..fb68c2b8cf8a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
48obj-$(CONFIG_NFS_COMMON) += nfs_common/ 48obj-$(CONFIG_NFS_COMMON) += nfs_common/
49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o 49obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
50 50
51obj-$(CONFIG_FHANDLE) += fhandle.o
52
51obj-y += quota/ 53obj-y += quota/
52 54
53obj-$(CONFIG_PROC_FS) += proc/ 55obj-$(CONFIG_PROC_FS) += proc/
@@ -121,3 +123,4 @@ obj-$(CONFIG_BTRFS_FS) += btrfs/
121obj-$(CONFIG_GFS2_FS) += gfs2/ 123obj-$(CONFIG_GFS2_FS) += gfs2/
122obj-$(CONFIG_EXOFS_FS) += exofs/ 124obj-$(CONFIG_EXOFS_FS) += exofs/
123obj-$(CONFIG_CEPH_FS) += ceph/ 125obj-$(CONFIG_CEPH_FS) += ceph/
126obj-$(CONFIG_PSTORE) += pstore/
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig
index 1dd5f34b3cf2..e55182a74605 100644
--- a/fs/adfs/Kconfig
+++ b/fs/adfs/Kconfig
@@ -1,7 +1,6 @@
1config ADFS_FS 1config ADFS_FS
2 tristate "ADFS file system support (EXPERIMENTAL)" 2 tristate "ADFS file system support (EXPERIMENTAL)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on BLOCK && EXPERIMENTAL
4 depends on BKL # need to fix
5 help 4 help
6 The Acorn Disc Filing System is the standard file system of the 5 The Acorn Disc Filing System is the standard file system of the
7 RiscOS operating system which runs on Acorn's ARM-based Risc PC 6 RiscOS operating system which runs on Acorn's ARM-based Risc PC
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 2ff622f6f547..718ac1f440c6 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -50,6 +50,7 @@ struct adfs_sb_info {
50 gid_t s_gid; /* owner gid */ 50 gid_t s_gid; /* owner gid */
51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ 51 umode_t s_owner_mask; /* ADFS owner perm -> unix perm */
52 umode_t s_other_mask; /* ADFS other perm -> unix perm */ 52 umode_t s_other_mask; /* ADFS other perm -> unix perm */
53 int s_ftsuffix; /* ,xyz hex filetype suffix option */
53 54
54 __u32 s_ids_per_zone; /* max. no ids in one zone */ 55 __u32 s_ids_per_zone; /* max. no ids in one zone */
55 __u32 s_idlen; /* length of ID in map */ 56 __u32 s_idlen; /* length of ID in map */
@@ -79,6 +80,10 @@ struct adfs_dir {
79 80
80 int nr_buffers; 81 int nr_buffers;
81 struct buffer_head *bh[4]; 82 struct buffer_head *bh[4];
83
84 /* big directories need allocated buffers */
85 struct buffer_head **bh_fplus;
86
82 unsigned int pos; 87 unsigned int pos;
83 unsigned int parent_id; 88 unsigned int parent_id;
84 89
@@ -89,7 +94,7 @@ struct adfs_dir {
89/* 94/*
90 * This is the overall maximum name length 95 * This is the overall maximum name length
91 */ 96 */
92#define ADFS_MAX_NAME_LEN 256 97#define ADFS_MAX_NAME_LEN (256 + 4) /* +4 for ,xyz hex filetype suffix */
93struct object_info { 98struct object_info {
94 __u32 parent_id; /* parent object id */ 99 __u32 parent_id; /* parent object id */
95 __u32 file_id; /* object id */ 100 __u32 file_id; /* object id */
@@ -97,10 +102,26 @@ struct object_info {
97 __u32 execaddr; /* execution address */ 102 __u32 execaddr; /* execution address */
98 __u32 size; /* size */ 103 __u32 size; /* size */
99 __u8 attr; /* RISC OS attributes */ 104 __u8 attr; /* RISC OS attributes */
100 unsigned char name_len; /* name length */ 105 unsigned int name_len; /* name length */
101 char name[ADFS_MAX_NAME_LEN];/* file name */ 106 char name[ADFS_MAX_NAME_LEN];/* file name */
107
108 /* RISC OS file type (12-bit: derived from loadaddr) */
109 __u16 filetype;
102}; 110};
103 111
112/* RISC OS 12-bit filetype converts to ,xyz hex filename suffix */
113static inline int append_filetype_suffix(char *buf, __u16 filetype)
114{
115 if (filetype == 0xffff) /* no explicit 12-bit file type was set */
116 return 0;
117
118 *buf++ = ',';
119 *buf++ = hex_asc_lo(filetype >> 8);
120 *buf++ = hex_asc_lo(filetype >> 4);
121 *buf++ = hex_asc_lo(filetype >> 0);
122 return 4;
123}
124
104struct adfs_dir_ops { 125struct adfs_dir_ops {
105 int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir); 126 int (*read)(struct super_block *sb, unsigned int id, unsigned int sz, struct adfs_dir *dir);
106 int (*setpos)(struct adfs_dir *dir, unsigned int fpos); 127 int (*setpos)(struct adfs_dir *dir, unsigned int fpos);
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 3b4a764ed780..3d83075aaa2e 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -9,7 +9,6 @@
9 * 9 *
10 * Common directory handling for ADFS 10 * Common directory handling for ADFS
11 */ 11 */
12#include <linux/smp_lock.h>
13#include "adfs.h" 12#include "adfs.h"
14 13
15/* 14/*
@@ -27,8 +26,6 @@ adfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
27 struct adfs_dir dir; 26 struct adfs_dir dir;
28 int ret = 0; 27 int ret = 0;
29 28
30 lock_kernel();
31
32 if (filp->f_pos >> 32) 29 if (filp->f_pos >> 32)
33 goto out; 30 goto out;
34 31
@@ -70,7 +67,6 @@ free_out:
70 ops->free(&dir); 67 ops->free(&dir);
71 68
72out: 69out:
73 unlock_kernel();
74 return ret; 70 return ret;
75} 71}
76 72
@@ -276,7 +272,6 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
276 struct object_info obj; 272 struct object_info obj;
277 int error; 273 int error;
278 274
279 lock_kernel();
280 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); 275 error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj);
281 if (error == 0) { 276 if (error == 0) {
282 error = -EACCES; 277 error = -EACCES;
@@ -288,7 +283,6 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
288 if (inode) 283 if (inode)
289 error = 0; 284 error = 0;
290 } 285 }
291 unlock_kernel();
292 d_add(dentry, inode); 286 d_add(dentry, inode);
293 return ERR_PTR(error); 287 return ERR_PTR(error);
294} 288}
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index bafc71222e25..4bbe853ee50a 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -52,7 +52,6 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen)
52 *buf++ = *ptr; 52 *buf++ = *ptr;
53 ptr++; 53 ptr++;
54 } 54 }
55 *buf = '\0';
56 55
57 return buf - old_buf; 56 return buf - old_buf;
58} 57}
@@ -208,7 +207,8 @@ release_buffers:
208 * convert a disk-based directory entry to a Linux ADFS directory entry 207 * convert a disk-based directory entry to a Linux ADFS directory entry
209 */ 208 */
210static inline void 209static inline void
211adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de) 210adfs_dir2obj(struct adfs_dir *dir, struct object_info *obj,
211 struct adfs_direntry *de)
212{ 212{
213 obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN); 213 obj->name_len = adfs_readname(obj->name, de->dirobname, ADFS_F_NAME_LEN);
214 obj->file_id = adfs_readval(de->dirinddiscadd, 3); 214 obj->file_id = adfs_readval(de->dirinddiscadd, 3);
@@ -216,6 +216,23 @@ adfs_dir2obj(struct object_info *obj, struct adfs_direntry *de)
216 obj->execaddr = adfs_readval(de->direxec, 4); 216 obj->execaddr = adfs_readval(de->direxec, 4);
217 obj->size = adfs_readval(de->dirlen, 4); 217 obj->size = adfs_readval(de->dirlen, 4);
218 obj->attr = de->newdiratts; 218 obj->attr = de->newdiratts;
219 obj->filetype = -1;
220
221 /*
222 * object is a file and is filetyped and timestamped?
223 * RISC OS 12-bit filetype is stored in load_address[19:8]
224 */
225 if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) &&
226 (0xfff00000 == (0xfff00000 & obj->loadaddr))) {
227 obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8);
228
229 /* optionally append the ,xyz hex filetype suffix */
230 if (ADFS_SB(dir->sb)->s_ftsuffix)
231 obj->name_len +=
232 append_filetype_suffix(
233 &obj->name[obj->name_len],
234 obj->filetype);
235 }
219} 236}
220 237
221/* 238/*
@@ -260,7 +277,7 @@ __adfs_dir_get(struct adfs_dir *dir, int pos, struct object_info *obj)
260 if (!de.dirobname[0]) 277 if (!de.dirobname[0])
261 return -ENOENT; 278 return -ENOENT;
262 279
263 adfs_dir2obj(obj, &de); 280 adfs_dir2obj(dir, obj, &de);
264 281
265 return 0; 282 return 0;
266} 283}
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 1796bb352d05..d9e3bee4e653 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -8,6 +8,7 @@
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
11#include <linux/slab.h>
11#include "adfs.h" 12#include "adfs.h"
12#include "dir_fplus.h" 13#include "dir_fplus.h"
13 14
@@ -22,30 +23,53 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
22 23
23 dir->nr_buffers = 0; 24 dir->nr_buffers = 0;
24 25
26 /* start off using fixed bh set - only alloc for big dirs */
27 dir->bh_fplus = &dir->bh[0];
28
25 block = __adfs_block_map(sb, id, 0); 29 block = __adfs_block_map(sb, id, 0);
26 if (!block) { 30 if (!block) {
27 adfs_error(sb, "dir object %X has a hole at offset 0", id); 31 adfs_error(sb, "dir object %X has a hole at offset 0", id);
28 goto out; 32 goto out;
29 } 33 }
30 34
31 dir->bh[0] = sb_bread(sb, block); 35 dir->bh_fplus[0] = sb_bread(sb, block);
32 if (!dir->bh[0]) 36 if (!dir->bh_fplus[0])
33 goto out; 37 goto out;
34 dir->nr_buffers += 1; 38 dir->nr_buffers += 1;
35 39
36 h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 40 h = (struct adfs_bigdirheader *)dir->bh_fplus[0]->b_data;
37 size = le32_to_cpu(h->bigdirsize); 41 size = le32_to_cpu(h->bigdirsize);
38 if (size != sz) { 42 if (size != sz) {
39 printk(KERN_WARNING "adfs: adfs_fplus_read: directory header size\n" 43 printk(KERN_WARNING "adfs: adfs_fplus_read:"
40 " does not match directory size\n"); 44 " directory header size %X\n"
45 " does not match directory size %X\n",
46 size, sz);
41 } 47 }
42 48
43 if (h->bigdirversion[0] != 0 || h->bigdirversion[1] != 0 || 49 if (h->bigdirversion[0] != 0 || h->bigdirversion[1] != 0 ||
44 h->bigdirversion[2] != 0 || size & 2047 || 50 h->bigdirversion[2] != 0 || size & 2047 ||
45 h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) 51 h->bigdirstartname != cpu_to_le32(BIGDIRSTARTNAME)) {
52 printk(KERN_WARNING "adfs: dir object %X has"
53 " malformed dir header\n", id);
46 goto out; 54 goto out;
55 }
47 56
48 size >>= sb->s_blocksize_bits; 57 size >>= sb->s_blocksize_bits;
58 if (size > sizeof(dir->bh)/sizeof(dir->bh[0])) {
59 /* this directory is too big for fixed bh set, must allocate */
60 struct buffer_head **bh_fplus =
61 kzalloc(size * sizeof(struct buffer_head *),
62 GFP_KERNEL);
63 if (!bh_fplus) {
64 adfs_error(sb, "not enough memory for"
65 " dir object %X (%d blocks)", id, size);
66 goto out;
67 }
68 dir->bh_fplus = bh_fplus;
69 /* copy over the pointer to the block that we've already read */
70 dir->bh_fplus[0] = dir->bh[0];
71 }
72
49 for (blk = 1; blk < size; blk++) { 73 for (blk = 1; blk < size; blk++) {
50 block = __adfs_block_map(sb, id, blk); 74 block = __adfs_block_map(sb, id, blk);
51 if (!block) { 75 if (!block) {
@@ -53,25 +77,44 @@ adfs_fplus_read(struct super_block *sb, unsigned int id, unsigned int sz, struct
53 goto out; 77 goto out;
54 } 78 }
55 79
56 dir->bh[blk] = sb_bread(sb, block); 80 dir->bh_fplus[blk] = sb_bread(sb, block);
57 if (!dir->bh[blk]) 81 if (!dir->bh_fplus[blk]) {
82 adfs_error(sb, "dir object %X failed read for"
83 " offset %d, mapped block %X",
84 id, blk, block);
58 goto out; 85 goto out;
59 dir->nr_buffers = blk; 86 }
87
88 dir->nr_buffers += 1;
60 } 89 }
61 90
62 t = (struct adfs_bigdirtail *)(dir->bh[size - 1]->b_data + (sb->s_blocksize - 8)); 91 t = (struct adfs_bigdirtail *)
92 (dir->bh_fplus[size - 1]->b_data + (sb->s_blocksize - 8));
63 93
64 if (t->bigdirendname != cpu_to_le32(BIGDIRENDNAME) || 94 if (t->bigdirendname != cpu_to_le32(BIGDIRENDNAME) ||
65 t->bigdirendmasseq != h->startmasseq || 95 t->bigdirendmasseq != h->startmasseq ||
66 t->reserved[0] != 0 || t->reserved[1] != 0) 96 t->reserved[0] != 0 || t->reserved[1] != 0) {
97 printk(KERN_WARNING "adfs: dir object %X has "
98 "malformed dir end\n", id);
67 goto out; 99 goto out;
100 }
68 101
69 dir->parent_id = le32_to_cpu(h->bigdirparent); 102 dir->parent_id = le32_to_cpu(h->bigdirparent);
70 dir->sb = sb; 103 dir->sb = sb;
71 return 0; 104 return 0;
105
72out: 106out:
73 for (i = 0; i < dir->nr_buffers; i++) 107 if (dir->bh_fplus) {
74 brelse(dir->bh[i]); 108 for (i = 0; i < dir->nr_buffers; i++)
109 brelse(dir->bh_fplus[i]);
110
111 if (&dir->bh[0] != dir->bh_fplus)
112 kfree(dir->bh_fplus);
113
114 dir->bh_fplus = NULL;
115 }
116
117 dir->nr_buffers = 0;
75 dir->sb = NULL; 118 dir->sb = NULL;
76 return ret; 119 return ret;
77} 120}
@@ -79,7 +122,8 @@ out:
79static int 122static int
80adfs_fplus_setpos(struct adfs_dir *dir, unsigned int fpos) 123adfs_fplus_setpos(struct adfs_dir *dir, unsigned int fpos)
81{ 124{
82 struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 125 struct adfs_bigdirheader *h =
126 (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data;
83 int ret = -ENOENT; 127 int ret = -ENOENT;
84 128
85 if (fpos <= le32_to_cpu(h->bigdirentries)) { 129 if (fpos <= le32_to_cpu(h->bigdirentries)) {
@@ -102,21 +146,27 @@ dir_memcpy(struct adfs_dir *dir, unsigned int offset, void *to, int len)
102 partial = sb->s_blocksize - offset; 146 partial = sb->s_blocksize - offset;
103 147
104 if (partial >= len) 148 if (partial >= len)
105 memcpy(to, dir->bh[buffer]->b_data + offset, len); 149 memcpy(to, dir->bh_fplus[buffer]->b_data + offset, len);
106 else { 150 else {
107 char *c = (char *)to; 151 char *c = (char *)to;
108 152
109 remainder = len - partial; 153 remainder = len - partial;
110 154
111 memcpy(c, dir->bh[buffer]->b_data + offset, partial); 155 memcpy(c,
112 memcpy(c + partial, dir->bh[buffer + 1]->b_data, remainder); 156 dir->bh_fplus[buffer]->b_data + offset,
157 partial);
158
159 memcpy(c + partial,
160 dir->bh_fplus[buffer + 1]->b_data,
161 remainder);
113 } 162 }
114} 163}
115 164
116static int 165static int
117adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj) 166adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj)
118{ 167{
119 struct adfs_bigdirheader *h = (struct adfs_bigdirheader *)dir->bh[0]->b_data; 168 struct adfs_bigdirheader *h =
169 (struct adfs_bigdirheader *) dir->bh_fplus[0]->b_data;
120 struct adfs_bigdirentry bde; 170 struct adfs_bigdirentry bde;
121 unsigned int offset; 171 unsigned int offset;
122 int i, ret = -ENOENT; 172 int i, ret = -ENOENT;
@@ -147,6 +197,24 @@ adfs_fplus_getnext(struct adfs_dir *dir, struct object_info *obj)
147 if (obj->name[i] == '/') 197 if (obj->name[i] == '/')
148 obj->name[i] = '.'; 198 obj->name[i] = '.';
149 199
200 obj->filetype = -1;
201
202 /*
203 * object is a file and is filetyped and timestamped?
204 * RISC OS 12-bit filetype is stored in load_address[19:8]
205 */
206 if ((0 == (obj->attr & ADFS_NDA_DIRECTORY)) &&
207 (0xfff00000 == (0xfff00000 & obj->loadaddr))) {
208 obj->filetype = (__u16) ((0x000fff00 & obj->loadaddr) >> 8);
209
210 /* optionally append the ,xyz hex filetype suffix */
211 if (ADFS_SB(dir->sb)->s_ftsuffix)
212 obj->name_len +=
213 append_filetype_suffix(
214 &obj->name[obj->name_len],
215 obj->filetype);
216 }
217
150 dir->pos += 1; 218 dir->pos += 1;
151 ret = 0; 219 ret = 0;
152out: 220out:
@@ -160,7 +228,7 @@ adfs_fplus_sync(struct adfs_dir *dir)
160 int i; 228 int i;
161 229
162 for (i = dir->nr_buffers - 1; i >= 0; i--) { 230 for (i = dir->nr_buffers - 1; i >= 0; i--) {
163 struct buffer_head *bh = dir->bh[i]; 231 struct buffer_head *bh = dir->bh_fplus[i];
164 sync_dirty_buffer(bh); 232 sync_dirty_buffer(bh);
165 if (buffer_req(bh) && !buffer_uptodate(bh)) 233 if (buffer_req(bh) && !buffer_uptodate(bh))
166 err = -EIO; 234 err = -EIO;
@@ -174,8 +242,17 @@ adfs_fplus_free(struct adfs_dir *dir)
174{ 242{
175 int i; 243 int i;
176 244
177 for (i = 0; i < dir->nr_buffers; i++) 245 if (dir->bh_fplus) {
178 brelse(dir->bh[i]); 246 for (i = 0; i < dir->nr_buffers; i++)
247 brelse(dir->bh_fplus[i]);
248
249 if (&dir->bh[0] != dir->bh_fplus)
250 kfree(dir->bh_fplus);
251
252 dir->bh_fplus = NULL;
253 }
254
255 dir->nr_buffers = 0;
179 dir->sb = NULL; 256 dir->sb = NULL;
180} 257}
181 258
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index 65794b8fe79e..92444e94f842 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -7,7 +7,6 @@
7 * it under the terms of the GNU General Public License version 2 as 7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation. 8 * published by the Free Software Foundation.
9 */ 9 */
10#include <linux/smp_lock.h>
11#include <linux/buffer_head.h> 10#include <linux/buffer_head.h>
12#include <linux/writeback.h> 11#include <linux/writeback.h>
13#include "adfs.h" 12#include "adfs.h"
@@ -79,26 +78,13 @@ static const struct address_space_operations adfs_aops = {
79 .bmap = _adfs_bmap 78 .bmap = _adfs_bmap
80}; 79};
81 80
82static inline unsigned int
83adfs_filetype(struct inode *inode)
84{
85 unsigned int type;
86
87 if (ADFS_I(inode)->stamped)
88 type = (ADFS_I(inode)->loadaddr >> 8) & 0xfff;
89 else
90 type = (unsigned int) -1;
91
92 return type;
93}
94
95/* 81/*
96 * Convert ADFS attributes and filetype to Linux permission. 82 * Convert ADFS attributes and filetype to Linux permission.
97 */ 83 */
98static umode_t 84static umode_t
99adfs_atts2mode(struct super_block *sb, struct inode *inode) 85adfs_atts2mode(struct super_block *sb, struct inode *inode)
100{ 86{
101 unsigned int filetype, attr = ADFS_I(inode)->attr; 87 unsigned int attr = ADFS_I(inode)->attr;
102 umode_t mode, rmask; 88 umode_t mode, rmask;
103 struct adfs_sb_info *asb = ADFS_SB(sb); 89 struct adfs_sb_info *asb = ADFS_SB(sb);
104 90
@@ -107,9 +93,7 @@ adfs_atts2mode(struct super_block *sb, struct inode *inode)
107 return S_IFDIR | S_IXUGO | mode; 93 return S_IFDIR | S_IXUGO | mode;
108 } 94 }
109 95
110 filetype = adfs_filetype(inode); 96 switch (ADFS_I(inode)->filetype) {
111
112 switch (filetype) {
113 case 0xfc0: /* LinkFS */ 97 case 0xfc0: /* LinkFS */
114 return S_IFLNK|S_IRWXUGO; 98 return S_IFLNK|S_IRWXUGO;
115 99
@@ -175,50 +159,48 @@ adfs_mode2atts(struct super_block *sb, struct inode *inode)
175 159
176/* 160/*
177 * Convert an ADFS time to Unix time. ADFS has a 40-bit centi-second time 161 * Convert an ADFS time to Unix time. ADFS has a 40-bit centi-second time
178 * referenced to 1 Jan 1900 (til 2248) 162 * referenced to 1 Jan 1900 (til 2248) so we need to discard 2208988800 seconds
163 * of time to convert from RISC OS epoch to Unix epoch.
179 */ 164 */
180static void 165static void
181adfs_adfs2unix_time(struct timespec *tv, struct inode *inode) 166adfs_adfs2unix_time(struct timespec *tv, struct inode *inode)
182{ 167{
183 unsigned int high, low; 168 unsigned int high, low;
169 /* 01 Jan 1970 00:00:00 (Unix epoch) as nanoseconds since
170 * 01 Jan 1900 00:00:00 (RISC OS epoch)
171 */
172 static const s64 nsec_unix_epoch_diff_risc_os_epoch =
173 2208988800000000000LL;
174 s64 nsec;
184 175
185 if (ADFS_I(inode)->stamped == 0) 176 if (ADFS_I(inode)->stamped == 0)
186 goto cur_time; 177 goto cur_time;
187 178
188 high = ADFS_I(inode)->loadaddr << 24; 179 high = ADFS_I(inode)->loadaddr & 0xFF; /* top 8 bits of timestamp */
189 low = ADFS_I(inode)->execaddr; 180 low = ADFS_I(inode)->execaddr; /* bottom 32 bits of timestamp */
190 181
191 high |= low >> 8; 182 /* convert 40-bit centi-seconds to 32-bit seconds
192 low &= 255; 183 * going via nanoseconds to retain precision
184 */
185 nsec = (((s64) high << 32) | (s64) low) * 10000000; /* cs to ns */
193 186
194 /* Files dated pre 01 Jan 1970 00:00:00. */ 187 /* Files dated pre 01 Jan 1970 00:00:00. */
195 if (high < 0x336e996a) 188 if (nsec < nsec_unix_epoch_diff_risc_os_epoch)
196 goto too_early; 189 goto too_early;
197 190
198 /* Files dated post 18 Jan 2038 03:14:05. */ 191 /* convert from RISC OS to Unix epoch */
199 if (high >= 0x656e9969) 192 nsec -= nsec_unix_epoch_diff_risc_os_epoch;
200 goto too_late;
201
202 /* discard 2208988800 (0x336e996a00) seconds of time */
203 high -= 0x336e996a;
204 193
205 /* convert 40-bit centi-seconds to 32-bit seconds */ 194 *tv = ns_to_timespec(nsec);
206 tv->tv_sec = (((high % 100) << 8) + low) / 100 + (high / 100 << 8);
207 tv->tv_nsec = 0;
208 return; 195 return;
209 196
210 cur_time: 197 cur_time:
211 *tv = CURRENT_TIME_SEC; 198 *tv = CURRENT_TIME;
212 return; 199 return;
213 200
214 too_early: 201 too_early:
215 tv->tv_sec = tv->tv_nsec = 0; 202 tv->tv_sec = tv->tv_nsec = 0;
216 return; 203 return;
217
218 too_late:
219 tv->tv_sec = 0x7ffffffd;
220 tv->tv_nsec = 0;
221 return;
222} 204}
223 205
224/* 206/*
@@ -280,7 +262,8 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
280 ADFS_I(inode)->loadaddr = obj->loadaddr; 262 ADFS_I(inode)->loadaddr = obj->loadaddr;
281 ADFS_I(inode)->execaddr = obj->execaddr; 263 ADFS_I(inode)->execaddr = obj->execaddr;
282 ADFS_I(inode)->attr = obj->attr; 264 ADFS_I(inode)->attr = obj->attr;
283 ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000); 265 ADFS_I(inode)->filetype = obj->filetype;
266 ADFS_I(inode)->stamped = ((obj->loadaddr & 0xfff00000) == 0xfff00000);
284 267
285 inode->i_mode = adfs_atts2mode(sb, inode); 268 inode->i_mode = adfs_atts2mode(sb, inode);
286 adfs_adfs2unix_time(&inode->i_mtime, inode); 269 adfs_adfs2unix_time(&inode->i_mtime, inode);
@@ -316,8 +299,6 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
316 unsigned int ia_valid = attr->ia_valid; 299 unsigned int ia_valid = attr->ia_valid;
317 int error; 300 int error;
318 301
319 lock_kernel();
320
321 error = inode_change_ok(inode, attr); 302 error = inode_change_ok(inode, attr);
322 303
323 /* 304 /*
@@ -359,7 +340,6 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr)
359 if (ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MODE)) 340 if (ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MODE))
360 mark_inode_dirty(inode); 341 mark_inode_dirty(inode);
361out: 342out:
362 unlock_kernel();
363 return error; 343 return error;
364} 344}
365 345
@@ -374,7 +354,6 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
374 struct object_info obj; 354 struct object_info obj;
375 int ret; 355 int ret;
376 356
377 lock_kernel();
378 obj.file_id = inode->i_ino; 357 obj.file_id = inode->i_ino;
379 obj.name_len = 0; 358 obj.name_len = 0;
380 obj.parent_id = ADFS_I(inode)->parent_id; 359 obj.parent_id = ADFS_I(inode)->parent_id;
@@ -384,6 +363,5 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc)
384 obj.size = inode->i_size; 363 obj.size = inode->i_size;
385 364
386 ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL); 365 ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL);
387 unlock_kernel();
388 return ret; 366 return ret;
389} 367}
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 2d7954049fbe..c8bf36a1996a 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -14,7 +14,6 @@
14#include <linux/mount.h> 14#include <linux/mount.h>
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/smp_lock.h>
18#include <linux/statfs.h> 17#include <linux/statfs.h>
19#include "adfs.h" 18#include "adfs.h"
20#include "dir_f.h" 19#include "dir_f.h"
@@ -120,15 +119,11 @@ static void adfs_put_super(struct super_block *sb)
120 int i; 119 int i;
121 struct adfs_sb_info *asb = ADFS_SB(sb); 120 struct adfs_sb_info *asb = ADFS_SB(sb);
122 121
123 lock_kernel();
124
125 for (i = 0; i < asb->s_map_size; i++) 122 for (i = 0; i < asb->s_map_size; i++)
126 brelse(asb->s_map[i].dm_bh); 123 brelse(asb->s_map[i].dm_bh);
127 kfree(asb->s_map); 124 kfree(asb->s_map);
128 kfree(asb); 125 kfree(asb);
129 sb->s_fs_info = NULL; 126 sb->s_fs_info = NULL;
130
131 unlock_kernel();
132} 127}
133 128
134static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) 129static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
@@ -143,17 +138,20 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
143 seq_printf(seq, ",ownmask=%o", asb->s_owner_mask); 138 seq_printf(seq, ",ownmask=%o", asb->s_owner_mask);
144 if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK) 139 if (asb->s_other_mask != ADFS_DEFAULT_OTHER_MASK)
145 seq_printf(seq, ",othmask=%o", asb->s_other_mask); 140 seq_printf(seq, ",othmask=%o", asb->s_other_mask);
141 if (asb->s_ftsuffix != 0)
142 seq_printf(seq, ",ftsuffix=%u", asb->s_ftsuffix);
146 143
147 return 0; 144 return 0;
148} 145}
149 146
150enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; 147enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_ftsuffix, Opt_err};
151 148
152static const match_table_t tokens = { 149static const match_table_t tokens = {
153 {Opt_uid, "uid=%u"}, 150 {Opt_uid, "uid=%u"},
154 {Opt_gid, "gid=%u"}, 151 {Opt_gid, "gid=%u"},
155 {Opt_ownmask, "ownmask=%o"}, 152 {Opt_ownmask, "ownmask=%o"},
156 {Opt_othmask, "othmask=%o"}, 153 {Opt_othmask, "othmask=%o"},
154 {Opt_ftsuffix, "ftsuffix=%u"},
157 {Opt_err, NULL} 155 {Opt_err, NULL}
158}; 156};
159 157
@@ -194,6 +192,11 @@ static int parse_options(struct super_block *sb, char *options)
194 return -EINVAL; 192 return -EINVAL;
195 asb->s_other_mask = option; 193 asb->s_other_mask = option;
196 break; 194 break;
195 case Opt_ftsuffix:
196 if (match_int(args, &option))
197 return -EINVAL;
198 asb->s_ftsuffix = option;
199 break;
197 default: 200 default:
198 printk("ADFS-fs: unrecognised mount option \"%s\" " 201 printk("ADFS-fs: unrecognised mount option \"%s\" "
199 "or missing value\n", p); 202 "or missing value\n", p);
@@ -359,15 +362,11 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
359 struct adfs_sb_info *asb; 362 struct adfs_sb_info *asb;
360 struct inode *root; 363 struct inode *root;
361 364
362 lock_kernel();
363
364 sb->s_flags |= MS_NODIRATIME; 365 sb->s_flags |= MS_NODIRATIME;
365 366
366 asb = kzalloc(sizeof(*asb), GFP_KERNEL); 367 asb = kzalloc(sizeof(*asb), GFP_KERNEL);
367 if (!asb) { 368 if (!asb)
368 unlock_kernel();
369 return -ENOMEM; 369 return -ENOMEM;
370 }
371 sb->s_fs_info = asb; 370 sb->s_fs_info = asb;
372 371
373 /* set default options */ 372 /* set default options */
@@ -375,6 +374,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
375 asb->s_gid = 0; 374 asb->s_gid = 0;
376 asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK; 375 asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK;
377 asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK; 376 asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK;
377 asb->s_ftsuffix = 0;
378 378
379 if (parse_options(sb, data)) 379 if (parse_options(sb, data))
380 goto error; 380 goto error;
@@ -454,11 +454,13 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
454 454
455 root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root); 455 root_obj.parent_id = root_obj.file_id = le32_to_cpu(dr->root);
456 root_obj.name_len = 0; 456 root_obj.name_len = 0;
457 root_obj.loadaddr = 0; 457 /* Set root object date as 01 Jan 1987 00:00:00 */
458 root_obj.execaddr = 0; 458 root_obj.loadaddr = 0xfff0003f;
459 root_obj.execaddr = 0xec22c000;
459 root_obj.size = ADFS_NEWDIR_SIZE; 460 root_obj.size = ADFS_NEWDIR_SIZE;
460 root_obj.attr = ADFS_NDA_DIRECTORY | ADFS_NDA_OWNER_READ | 461 root_obj.attr = ADFS_NDA_DIRECTORY | ADFS_NDA_OWNER_READ |
461 ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ; 462 ADFS_NDA_OWNER_WRITE | ADFS_NDA_PUBLIC_READ;
463 root_obj.filetype = -1;
462 464
463 /* 465 /*
464 * If this is a F+ disk with variable length directories, 466 * If this is a F+ disk with variable length directories,
@@ -472,6 +474,12 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
472 asb->s_dir = &adfs_f_dir_ops; 474 asb->s_dir = &adfs_f_dir_ops;
473 asb->s_namelen = ADFS_F_NAME_LEN; 475 asb->s_namelen = ADFS_F_NAME_LEN;
474 } 476 }
477 /*
478 * ,xyz hex filetype suffix may be added by driver
479 * to files that have valid RISC OS filetype
480 */
481 if (asb->s_ftsuffix)
482 asb->s_namelen += 4;
475 483
476 sb->s_d_op = &adfs_dentry_operations; 484 sb->s_d_op = &adfs_dentry_operations;
477 root = adfs_iget(sb, &root_obj); 485 root = adfs_iget(sb, &root_obj);
@@ -485,7 +493,6 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent)
485 adfs_error(sb, "get root inode failed\n"); 493 adfs_error(sb, "get root inode failed\n");
486 goto error; 494 goto error;
487 } 495 }
488 unlock_kernel();
489 return 0; 496 return 0;
490 497
491error_free_bh: 498error_free_bh:
@@ -493,7 +500,6 @@ error_free_bh:
493error: 500error:
494 sb->s_fs_info = NULL; 501 sb->s_fs_info = NULL;
495 kfree(asb); 502 kfree(asb);
496 unlock_kernel();
497 return -EINVAL; 503 return -EINVAL;
498} 504}
499 505
diff --git a/fs/affs/Makefile b/fs/affs/Makefile
index b2c4f54446f3..3988b4a78339 100644
--- a/fs/affs/Makefile
+++ b/fs/affs/Makefile
@@ -2,7 +2,7 @@
2# Makefile for the Linux affs filesystem routines. 2# Makefile for the Linux affs filesystem routines.
3# 3#
4 4
5#EXTRA_CFLAGS=-DDEBUG=1 5#ccflags-y := -DDEBUG=1
6 6
7obj-$(CONFIG_AFFS_FS) += affs.o 7obj-$(CONFIG_AFFS_FS) += affs.o
8 8
diff --git a/fs/aio.c b/fs/aio.c
index 26869cde3953..ebb6a22e4e1b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -85,7 +85,7 @@ static int __init aio_setup(void)
85 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 85 kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); 86 kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
87 87
88 aio_wq = create_workqueue("aio"); 88 aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); 89 abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
90 BUG_ON(!aio_wq || !abe_pool); 90 BUG_ON(!aio_wq || !abe_pool);
91 91
@@ -520,7 +520,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
520 ctx->reqs_active--; 520 ctx->reqs_active--;
521 521
522 if (unlikely(!ctx->reqs_active && ctx->dead)) 522 if (unlikely(!ctx->reqs_active && ctx->dead))
523 wake_up(&ctx->wait); 523 wake_up_all(&ctx->wait);
524} 524}
525 525
526static void aio_fput_routine(struct work_struct *data) 526static void aio_fput_routine(struct work_struct *data)
@@ -577,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
577 spin_lock(&fput_lock); 577 spin_lock(&fput_lock);
578 list_add(&req->ki_list, &fput_head); 578 list_add(&req->ki_list, &fput_head);
579 spin_unlock(&fput_lock); 579 spin_unlock(&fput_lock);
580 queue_work(aio_wq, &fput_work); 580 schedule_work(&fput_work);
581 } else { 581 } else {
582 req->ki_filp = NULL; 582 req->ki_filp = NULL;
583 really_put_req(ctx, req); 583 really_put_req(ctx, req);
@@ -1229,7 +1229,7 @@ static void io_destroy(struct kioctx *ioctx)
1229 * by other CPUs at this point. Right now, we rely on the 1229 * by other CPUs at this point. Right now, we rely on the
1230 * locking done by the above calls to ensure this consistency. 1230 * locking done by the above calls to ensure this consistency.
1231 */ 1231 */
1232 wake_up(&ioctx->wait); 1232 wake_up_all(&ioctx->wait);
1233 put_ioctx(ioctx); /* once for the lookup */ 1233 put_ioctx(ioctx); /* once for the lookup */
1234} 1234}
1235 1235
diff --git a/fs/attr.c b/fs/attr.c
index 7ca41811afa1..1007ed616314 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -59,7 +59,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
59 59
60 /* Make sure a caller can chmod. */ 60 /* Make sure a caller can chmod. */
61 if (ia_valid & ATTR_MODE) { 61 if (ia_valid & ATTR_MODE) {
62 if (!is_owner_or_cap(inode)) 62 if (!inode_owner_or_capable(inode))
63 return -EPERM; 63 return -EPERM;
64 /* Also check the setgid bit! */ 64 /* Also check the setgid bit! */
65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 65 if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
@@ -69,7 +69,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
69 69
70 /* Check for setting the inode time. */ 70 /* Check for setting the inode time. */
71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { 71 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
72 if (!is_owner_or_cap(inode)) 72 if (!inode_owner_or_capable(inode))
73 return -EPERM; 73 return -EPERM;
74 } 74 }
75 75
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 014e7aba3b08..e6f84d26f4cf 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -36,7 +36,7 @@ static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
36static int autofs4_dir_open(struct inode *inode, struct file *file); 36static int autofs4_dir_open(struct inode *inode, struct file *file);
37static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); 37static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
38static struct vfsmount *autofs4_d_automount(struct path *); 38static struct vfsmount *autofs4_d_automount(struct path *);
39static int autofs4_d_manage(struct dentry *, bool, bool); 39static int autofs4_d_manage(struct dentry *, bool);
40static void autofs4_dentry_release(struct dentry *); 40static void autofs4_dentry_release(struct dentry *);
41 41
42const struct file_operations autofs4_root_operations = { 42const struct file_operations autofs4_root_operations = {
@@ -446,7 +446,7 @@ done:
446 return NULL; 446 return NULL;
447} 447}
448 448
449int autofs4_d_manage(struct dentry *dentry, bool mounting_here, bool rcu_walk) 449int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
450{ 450{
451 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 451 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
452 452
@@ -454,7 +454,7 @@ int autofs4_d_manage(struct dentry *dentry, bool mounting_here, bool rcu_walk)
454 dentry, dentry->d_name.len, dentry->d_name.name); 454 dentry, dentry->d_name.len, dentry->d_name.name);
455 455
456 /* The daemon never waits. */ 456 /* The daemon never waits. */
457 if (autofs4_oz_mode(sbi) || mounting_here) { 457 if (autofs4_oz_mode(sbi)) {
458 if (!d_mountpoint(dentry)) 458 if (!d_mountpoint(dentry))
459 return -EISDIR; 459 return -EISDIR;
460 return 0; 460 return 0;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 685ecff3ab31..b14cebfd9047 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -97,7 +97,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
97 if (!inode) 97 if (!inode)
98 return -ENOSPC; 98 return -ENOSPC;
99 mutex_lock(&info->bfs_lock); 99 mutex_lock(&info->bfs_lock);
100 ino = find_first_zero_bit(info->si_imap, info->si_lasti); 100 ino = find_first_zero_bit(info->si_imap, info->si_lasti + 1);
101 if (ino > info->si_lasti) { 101 if (ino > info->si_lasti) {
102 mutex_unlock(&info->bfs_lock); 102 mutex_unlock(&info->bfs_lock);
103 iput(inode); 103 iput(inode);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d5b640ba6cb1..f34078d702d3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -570,7 +570,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
570 unsigned long elf_entry; 570 unsigned long elf_entry;
571 unsigned long interp_load_addr = 0; 571 unsigned long interp_load_addr = 0;
572 unsigned long start_code, end_code, start_data, end_data; 572 unsigned long start_code, end_code, start_data, end_data;
573 unsigned long reloc_func_desc = 0; 573 unsigned long reloc_func_desc __maybe_unused = 0;
574 int executable_stack = EXSTACK_DEFAULT; 574 int executable_stack = EXSTACK_DEFAULT;
575 unsigned long def_flags = 0; 575 unsigned long def_flags = 0;
576 struct { 576 struct {
@@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm)
1906 segs = current->mm->map_count; 1906 segs = current->mm->map_count;
1907 segs += elf_core_extra_phdrs(); 1907 segs += elf_core_extra_phdrs();
1908 1908
1909 gate_vma = get_gate_vma(current); 1909 gate_vma = get_gate_vma(current->mm);
1910 if (gate_vma != NULL) 1910 if (gate_vma != NULL)
1911 segs++; 1911 segs++;
1912 1912
diff --git a/fs/bio.c b/fs/bio.c
index 4bd454fa844e..4cf2a52fbc54 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -111,7 +111,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
111 if (!slab) 111 if (!slab)
112 goto out_unlock; 112 goto out_unlock;
113 113
114 printk("bio: create slab <%s> at %d\n", bslab->name, entry); 114 printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
115 bslab->slab = slab; 115 bslab->slab = slab;
116 bslab->slab_ref = 1; 116 bslab->slab_ref = 1;
117 bslab->slab_size = sz; 117 bslab->slab_size = sz;
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 9c949348510b..de34bfad9ec3 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -170,7 +170,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
170 int ret; 170 int ret;
171 struct posix_acl *acl = NULL; 171 struct posix_acl *acl = NULL;
172 172
173 if (!is_owner_or_cap(dentry->d_inode)) 173 if (!inode_owner_or_capable(dentry->d_inode))
174 return -EPERM; 174 return -EPERM;
175 175
176 if (!IS_POSIXACL(dentry->d_inode)) 176 if (!IS_POSIXACL(dentry->d_inode))
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 6f820fa23df4..7f78cc78fdd0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -729,6 +729,15 @@ struct btrfs_space_info {
729 u64 disk_total; /* total bytes on disk, takes mirrors into 729 u64 disk_total; /* total bytes on disk, takes mirrors into
730 account */ 730 account */
731 731
732 /*
733 * we bump reservation progress every time we decrement
734 * bytes_reserved. This way people waiting for reservations
735 * know something good has happened and they can check
736 * for progress. The number here isn't to be trusted, it
737 * just shows reclaim activity
738 */
739 unsigned long reservation_progress;
740
732 int full; /* indicates that we cannot allocate any more 741 int full; /* indicates that we cannot allocate any more
733 chunks for this space */ 742 chunks for this space */
734 int force_alloc; /* set if we need to force a chunk alloc for 743 int force_alloc; /* set if we need to force a chunk alloc for
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e1aa8d607bc7..100b07f021b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2493,7 +2493,7 @@ int close_ctree(struct btrfs_root *root)
2493 * ERROR state on disk. 2493 * ERROR state on disk.
2494 * 2494 *
2495 * 2. when btrfs flips readonly just in btrfs_commit_super, 2495 * 2. when btrfs flips readonly just in btrfs_commit_super,
2496 * and in such case, btrfs cannnot write sb via btrfs_commit_super, 2496 * and in such case, btrfs cannot write sb via btrfs_commit_super,
2497 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag, 2497 * and since fs_state has been set BTRFS_SUPER_FLAG_ERROR flag,
2498 * btrfs will cleanup all FS resources first and write sb then. 2498 * btrfs will cleanup all FS resources first and write sb then.
2499 */ 2499 */
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ff27d7a477b2..b4ffad859adb 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
21 int len = *max_len; 21 int len = *max_len;
22 int type; 22 int type;
23 23
24 if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || 24 if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
25 (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) 25 *max_len = BTRFS_FID_SIZE_CONNECTABLE;
26 return 255; 26 return 255;
27 } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
28 *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 return 255;
30 }
27 31
28 len = BTRFS_FID_SIZE_NON_CONNECTABLE; 32 len = BTRFS_FID_SIZE_NON_CONNECTABLE;
29 type = FILEID_BTRFS_WITHOUT_PARENT; 33 type = FILEID_BTRFS_WITHOUT_PARENT;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 588ff9849873..7b3089b5c2df 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3342,15 +3342,16 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3342 u64 max_reclaim; 3342 u64 max_reclaim;
3343 u64 reclaimed = 0; 3343 u64 reclaimed = 0;
3344 long time_left; 3344 long time_left;
3345 int pause = 1;
3346 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3345 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3347 int loops = 0; 3346 int loops = 0;
3347 unsigned long progress;
3348 3348
3349 block_rsv = &root->fs_info->delalloc_block_rsv; 3349 block_rsv = &root->fs_info->delalloc_block_rsv;
3350 space_info = block_rsv->space_info; 3350 space_info = block_rsv->space_info;
3351 3351
3352 smp_mb(); 3352 smp_mb();
3353 reserved = space_info->bytes_reserved; 3353 reserved = space_info->bytes_reserved;
3354 progress = space_info->reservation_progress;
3354 3355
3355 if (reserved == 0) 3356 if (reserved == 0)
3356 return 0; 3357 return 0;
@@ -3365,31 +3366,36 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3365 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3366 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3366 3367
3367 spin_lock(&space_info->lock); 3368 spin_lock(&space_info->lock);
3368 if (reserved > space_info->bytes_reserved) { 3369 if (reserved > space_info->bytes_reserved)
3369 loops = 0;
3370 reclaimed += reserved - space_info->bytes_reserved; 3370 reclaimed += reserved - space_info->bytes_reserved;
3371 } else {
3372 loops++;
3373 }
3374 reserved = space_info->bytes_reserved; 3371 reserved = space_info->bytes_reserved;
3375 spin_unlock(&space_info->lock); 3372 spin_unlock(&space_info->lock);
3376 3373
3374 loops++;
3375
3377 if (reserved == 0 || reclaimed >= max_reclaim) 3376 if (reserved == 0 || reclaimed >= max_reclaim)
3378 break; 3377 break;
3379 3378
3380 if (trans && trans->transaction->blocked) 3379 if (trans && trans->transaction->blocked)
3381 return -EAGAIN; 3380 return -EAGAIN;
3382 3381
3383 __set_current_state(TASK_INTERRUPTIBLE); 3382 time_left = schedule_timeout_interruptible(1);
3384 time_left = schedule_timeout(pause);
3385 3383
3386 /* We were interrupted, exit */ 3384 /* We were interrupted, exit */
3387 if (time_left) 3385 if (time_left)
3388 break; 3386 break;
3389 3387
3390 pause <<= 1; 3388 /* we've kicked the IO a few times, if anything has been freed,
3391 if (pause > HZ / 10) 3389 * exit. There is no sense in looping here for a long time
3392 pause = HZ / 10; 3390 * when we really need to commit the transaction, or there are
3391 * just too many writers without enough free space
3392 */
3393
3394 if (loops > 3) {
3395 smp_mb();
3396 if (progress != space_info->reservation_progress)
3397 break;
3398 }
3393 3399
3394 } 3400 }
3395 return reclaimed >= to_reclaim; 3401 return reclaimed >= to_reclaim;
@@ -3612,6 +3618,7 @@ void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
3612 if (num_bytes) { 3618 if (num_bytes) {
3613 spin_lock(&space_info->lock); 3619 spin_lock(&space_info->lock);
3614 space_info->bytes_reserved -= num_bytes; 3620 space_info->bytes_reserved -= num_bytes;
3621 space_info->reservation_progress++;
3615 spin_unlock(&space_info->lock); 3622 spin_unlock(&space_info->lock);
3616 } 3623 }
3617 } 3624 }
@@ -3844,6 +3851,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3844 if (block_rsv->reserved >= block_rsv->size) { 3851 if (block_rsv->reserved >= block_rsv->size) {
3845 num_bytes = block_rsv->reserved - block_rsv->size; 3852 num_bytes = block_rsv->reserved - block_rsv->size;
3846 sinfo->bytes_reserved -= num_bytes; 3853 sinfo->bytes_reserved -= num_bytes;
3854 sinfo->reservation_progress++;
3847 block_rsv->reserved = block_rsv->size; 3855 block_rsv->reserved = block_rsv->size;
3848 block_rsv->full = 1; 3856 block_rsv->full = 1;
3849 } 3857 }
@@ -4005,7 +4013,6 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4005 to_reserve = 0; 4013 to_reserve = 0;
4006 } 4014 }
4007 spin_unlock(&BTRFS_I(inode)->accounting_lock); 4015 spin_unlock(&BTRFS_I(inode)->accounting_lock);
4008
4009 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4016 to_reserve += calc_csum_metadata_size(inode, num_bytes);
4010 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4017 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
4011 if (ret) 4018 if (ret)
@@ -4133,6 +4140,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4133 btrfs_set_block_group_used(&cache->item, old_val); 4140 btrfs_set_block_group_used(&cache->item, old_val);
4134 cache->reserved -= num_bytes; 4141 cache->reserved -= num_bytes;
4135 cache->space_info->bytes_reserved -= num_bytes; 4142 cache->space_info->bytes_reserved -= num_bytes;
4143 cache->space_info->reservation_progress++;
4136 cache->space_info->bytes_used += num_bytes; 4144 cache->space_info->bytes_used += num_bytes;
4137 cache->space_info->disk_used += num_bytes * factor; 4145 cache->space_info->disk_used += num_bytes * factor;
4138 spin_unlock(&cache->lock); 4146 spin_unlock(&cache->lock);
@@ -4184,6 +4192,7 @@ static int pin_down_extent(struct btrfs_root *root,
4184 if (reserved) { 4192 if (reserved) {
4185 cache->reserved -= num_bytes; 4193 cache->reserved -= num_bytes;
4186 cache->space_info->bytes_reserved -= num_bytes; 4194 cache->space_info->bytes_reserved -= num_bytes;
4195 cache->space_info->reservation_progress++;
4187 } 4196 }
4188 spin_unlock(&cache->lock); 4197 spin_unlock(&cache->lock);
4189 spin_unlock(&cache->space_info->lock); 4198 spin_unlock(&cache->space_info->lock);
@@ -4234,6 +4243,7 @@ static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
4234 space_info->bytes_readonly += num_bytes; 4243 space_info->bytes_readonly += num_bytes;
4235 cache->reserved -= num_bytes; 4244 cache->reserved -= num_bytes;
4236 space_info->bytes_reserved -= num_bytes; 4245 space_info->bytes_reserved -= num_bytes;
4246 space_info->reservation_progress++;
4237 } 4247 }
4238 spin_unlock(&cache->lock); 4248 spin_unlock(&cache->lock);
4239 spin_unlock(&space_info->lock); 4249 spin_unlock(&space_info->lock);
@@ -4712,6 +4722,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
4712 if (ret) { 4722 if (ret) {
4713 spin_lock(&cache->space_info->lock); 4723 spin_lock(&cache->space_info->lock);
4714 cache->space_info->bytes_reserved -= buf->len; 4724 cache->space_info->bytes_reserved -= buf->len;
4725 cache->space_info->reservation_progress++;
4715 spin_unlock(&cache->space_info->lock); 4726 spin_unlock(&cache->space_info->lock);
4716 } 4727 }
4717 goto out; 4728 goto out;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd3f172e94e6..714adc4ac4c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3046,17 +3046,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3046 } 3046 }
3047 3047
3048 while (!end) { 3048 while (!end) {
3049 off = extent_map_end(em); 3049 u64 offset_in_extent;
3050 if (off >= max) 3050
3051 end = 1; 3051 /* break if the extent we found is outside the range */
3052 if (em->start >= max || extent_map_end(em) < off)
3053 break;
3054
3055 /*
3056 * get_extent may return an extent that starts before our
3057 * requested range. We have to make sure the ranges
3058 * we return to fiemap always move forward and don't
3059 * overlap, so adjust the offsets here
3060 */
3061 em_start = max(em->start, off);
3052 3062
3053 em_start = em->start; 3063 /*
3054 em_len = em->len; 3064 * record the offset from the start of the extent
3065 * for adjusting the disk offset below
3066 */
3067 offset_in_extent = em_start - em->start;
3055 em_end = extent_map_end(em); 3068 em_end = extent_map_end(em);
3069 em_len = em_end - em_start;
3056 emflags = em->flags; 3070 emflags = em->flags;
3057 disko = 0; 3071 disko = 0;
3058 flags = 0; 3072 flags = 0;
3059 3073
3074 /*
3075 * bump off for our next call to get_extent
3076 */
3077 off = extent_map_end(em);
3078 if (off >= max)
3079 end = 1;
3080
3060 if (em->block_start == EXTENT_MAP_LAST_BYTE) { 3081 if (em->block_start == EXTENT_MAP_LAST_BYTE) {
3061 end = 1; 3082 end = 1;
3062 flags |= FIEMAP_EXTENT_LAST; 3083 flags |= FIEMAP_EXTENT_LAST;
@@ -3067,7 +3088,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3067 flags |= (FIEMAP_EXTENT_DELALLOC | 3088 flags |= (FIEMAP_EXTENT_DELALLOC |
3068 FIEMAP_EXTENT_UNKNOWN); 3089 FIEMAP_EXTENT_UNKNOWN);
3069 } else { 3090 } else {
3070 disko = em->block_start; 3091 disko = em->block_start + offset_in_extent;
3071 } 3092 }
3072 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 3093 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
3073 flags |= FIEMAP_EXTENT_ENCODED; 3094 flags |= FIEMAP_EXTENT_ENCODED;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7084140d5940..f447b783bb84 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -70,6 +70,19 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
70 70
71 /* Flush processor's dcache for this page */ 71 /* Flush processor's dcache for this page */
72 flush_dcache_page(page); 72 flush_dcache_page(page);
73
74 /*
75 * if we get a partial write, we can end up with
76 * partially up to date pages. These add
77 * a lot of complexity, so make sure they don't
78 * happen by forcing this copy to be retried.
79 *
80 * The rest of the btrfs_file_write code will fall
81 * back to page at a time copies after we return 0.
82 */
83 if (!PageUptodate(page) && copied < count)
84 copied = 0;
85
73 iov_iter_advance(i, copied); 86 iov_iter_advance(i, copied);
74 write_bytes -= copied; 87 write_bytes -= copied;
75 total_copied += copied; 88 total_copied += copied;
@@ -763,6 +776,27 @@ out:
763} 776}
764 777
765/* 778/*
779 * on error we return an unlocked page and the error value
780 * on success we return a locked page and 0
781 */
782static int prepare_uptodate_page(struct page *page, u64 pos)
783{
784 int ret = 0;
785
786 if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
787 ret = btrfs_readpage(NULL, page);
788 if (ret)
789 return ret;
790 lock_page(page);
791 if (!PageUptodate(page)) {
792 unlock_page(page);
793 return -EIO;
794 }
795 }
796 return 0;
797}
798
799/*
766 * this gets pages into the page cache and locks them down, it also properly 800 * this gets pages into the page cache and locks them down, it also properly
767 * waits for data=ordered extents to finish before allowing the pages to be 801 * waits for data=ordered extents to finish before allowing the pages to be
768 * modified. 802 * modified.
@@ -777,6 +811,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
777 unsigned long index = pos >> PAGE_CACHE_SHIFT; 811 unsigned long index = pos >> PAGE_CACHE_SHIFT;
778 struct inode *inode = fdentry(file)->d_inode; 812 struct inode *inode = fdentry(file)->d_inode;
779 int err = 0; 813 int err = 0;
814 int faili = 0;
780 u64 start_pos; 815 u64 start_pos;
781 u64 last_pos; 816 u64 last_pos;
782 817
@@ -794,15 +829,24 @@ again:
794 for (i = 0; i < num_pages; i++) { 829 for (i = 0; i < num_pages; i++) {
795 pages[i] = grab_cache_page(inode->i_mapping, index + i); 830 pages[i] = grab_cache_page(inode->i_mapping, index + i);
796 if (!pages[i]) { 831 if (!pages[i]) {
797 int c; 832 faili = i - 1;
798 for (c = i - 1; c >= 0; c--) { 833 err = -ENOMEM;
799 unlock_page(pages[c]); 834 goto fail;
800 page_cache_release(pages[c]); 835 }
801 } 836
802 return -ENOMEM; 837 if (i == 0)
838 err = prepare_uptodate_page(pages[i], pos);
839 if (i == num_pages - 1)
840 err = prepare_uptodate_page(pages[i],
841 pos + write_bytes);
842 if (err) {
843 page_cache_release(pages[i]);
844 faili = i - 1;
845 goto fail;
803 } 846 }
804 wait_on_page_writeback(pages[i]); 847 wait_on_page_writeback(pages[i]);
805 } 848 }
849 err = 0;
806 if (start_pos < inode->i_size) { 850 if (start_pos < inode->i_size) {
807 struct btrfs_ordered_extent *ordered; 851 struct btrfs_ordered_extent *ordered;
808 lock_extent_bits(&BTRFS_I(inode)->io_tree, 852 lock_extent_bits(&BTRFS_I(inode)->io_tree,
@@ -842,6 +886,14 @@ again:
842 WARN_ON(!PageLocked(pages[i])); 886 WARN_ON(!PageLocked(pages[i]));
843 } 887 }
844 return 0; 888 return 0;
889fail:
890 while (faili >= 0) {
891 unlock_page(pages[faili]);
892 page_cache_release(pages[faili]);
893 faili--;
894 }
895 return err;
896
845} 897}
846 898
847static ssize_t btrfs_file_aio_write(struct kiocb *iocb, 899static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
@@ -851,7 +903,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
851 struct file *file = iocb->ki_filp; 903 struct file *file = iocb->ki_filp;
852 struct inode *inode = fdentry(file)->d_inode; 904 struct inode *inode = fdentry(file)->d_inode;
853 struct btrfs_root *root = BTRFS_I(inode)->root; 905 struct btrfs_root *root = BTRFS_I(inode)->root;
854 struct page *pinned[2];
855 struct page **pages = NULL; 906 struct page **pages = NULL;
856 struct iov_iter i; 907 struct iov_iter i;
857 loff_t *ppos = &iocb->ki_pos; 908 loff_t *ppos = &iocb->ki_pos;
@@ -872,9 +923,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
872 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 923 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
873 (file->f_flags & O_DIRECT)); 924 (file->f_flags & O_DIRECT));
874 925
875 pinned[0] = NULL;
876 pinned[1] = NULL;
877
878 start_pos = pos; 926 start_pos = pos;
879 927
880 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 928 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
@@ -962,32 +1010,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
962 first_index = pos >> PAGE_CACHE_SHIFT; 1010 first_index = pos >> PAGE_CACHE_SHIFT;
963 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; 1011 last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
964 1012
965 /*
966 * there are lots of better ways to do this, but this code
967 * makes sure the first and last page in the file range are
968 * up to date and ready for cow
969 */
970 if ((pos & (PAGE_CACHE_SIZE - 1))) {
971 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
972 if (!PageUptodate(pinned[0])) {
973 ret = btrfs_readpage(NULL, pinned[0]);
974 BUG_ON(ret);
975 wait_on_page_locked(pinned[0]);
976 } else {
977 unlock_page(pinned[0]);
978 }
979 }
980 if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
981 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
982 if (!PageUptodate(pinned[1])) {
983 ret = btrfs_readpage(NULL, pinned[1]);
984 BUG_ON(ret);
985 wait_on_page_locked(pinned[1]);
986 } else {
987 unlock_page(pinned[1]);
988 }
989 }
990
991 while (iov_iter_count(&i) > 0) { 1013 while (iov_iter_count(&i) > 0) {
992 size_t offset = pos & (PAGE_CACHE_SIZE - 1); 1014 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
993 size_t write_bytes = min(iov_iter_count(&i), 1015 size_t write_bytes = min(iov_iter_count(&i),
@@ -1024,8 +1046,20 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1024 1046
1025 copied = btrfs_copy_from_user(pos, num_pages, 1047 copied = btrfs_copy_from_user(pos, num_pages,
1026 write_bytes, pages, &i); 1048 write_bytes, pages, &i);
1027 dirty_pages = (copied + offset + PAGE_CACHE_SIZE - 1) >> 1049
1028 PAGE_CACHE_SHIFT; 1050 /*
1051 * if we have trouble faulting in the pages, fall
1052 * back to one page at a time
1053 */
1054 if (copied < write_bytes)
1055 nrptrs = 1;
1056
1057 if (copied == 0)
1058 dirty_pages = 0;
1059 else
1060 dirty_pages = (copied + offset +
1061 PAGE_CACHE_SIZE - 1) >>
1062 PAGE_CACHE_SHIFT;
1029 1063
1030 if (num_pages > dirty_pages) { 1064 if (num_pages > dirty_pages) {
1031 if (copied > 0) 1065 if (copied > 0)
@@ -1069,10 +1103,6 @@ out:
1069 err = ret; 1103 err = ret;
1070 1104
1071 kfree(pages); 1105 kfree(pages);
1072 if (pinned[0])
1073 page_cache_release(pinned[0]);
1074 if (pinned[1])
1075 page_cache_release(pinned[1]);
1076 *ppos = pos; 1106 *ppos = pos;
1077 1107
1078 /* 1108 /*
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 0efdb65953c5..512c3d1da083 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -90,13 +90,14 @@ static noinline int cow_file_range(struct inode *inode,
90 unsigned long *nr_written, int unlock); 90 unsigned long *nr_written, int unlock);
91 91
92static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 92static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
93 struct inode *inode, struct inode *dir) 93 struct inode *inode, struct inode *dir,
94 const struct qstr *qstr)
94{ 95{
95 int err; 96 int err;
96 97
97 err = btrfs_init_acl(trans, inode, dir); 98 err = btrfs_init_acl(trans, inode, dir);
98 if (!err) 99 if (!err)
99 err = btrfs_xattr_security_init(trans, inode, dir); 100 err = btrfs_xattr_security_init(trans, inode, dir, qstr);
100 return err; 101 return err;
101} 102}
102 103
@@ -4704,7 +4705,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
4704 if (IS_ERR(inode)) 4705 if (IS_ERR(inode))
4705 goto out_unlock; 4706 goto out_unlock;
4706 4707
4707 err = btrfs_init_inode_security(trans, inode, dir); 4708 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4708 if (err) { 4709 if (err) {
4709 drop_inode = 1; 4710 drop_inode = 1;
4710 goto out_unlock; 4711 goto out_unlock;
@@ -4765,7 +4766,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
4765 if (IS_ERR(inode)) 4766 if (IS_ERR(inode))
4766 goto out_unlock; 4767 goto out_unlock;
4767 4768
4768 err = btrfs_init_inode_security(trans, inode, dir); 4769 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4769 if (err) { 4770 if (err) {
4770 drop_inode = 1; 4771 drop_inode = 1;
4771 goto out_unlock; 4772 goto out_unlock;
@@ -4806,9 +4807,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4806 int err; 4807 int err;
4807 int drop_inode = 0; 4808 int drop_inode = 0;
4808 4809
4809 if (inode->i_nlink == 0)
4810 return -ENOENT;
4811
4812 /* do not allow sys_link's with other subvols of the same device */ 4810 /* do not allow sys_link's with other subvols of the same device */
4813 if (root->objectid != BTRFS_I(inode)->root->objectid) 4811 if (root->objectid != BTRFS_I(inode)->root->objectid)
4814 return -EPERM; 4812 return -EPERM;
@@ -4821,10 +4819,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4821 goto fail; 4819 goto fail;
4822 4820
4823 /* 4821 /*
4824 * 1 item for inode ref 4822 * 2 items for inode and inode ref
4825 * 2 items for dir items 4823 * 2 items for dir items
4824 * 1 item for parent inode
4826 */ 4825 */
4827 trans = btrfs_start_transaction(root, 3); 4826 trans = btrfs_start_transaction(root, 5);
4828 if (IS_ERR(trans)) { 4827 if (IS_ERR(trans)) {
4829 err = PTR_ERR(trans); 4828 err = PTR_ERR(trans);
4830 goto fail; 4829 goto fail;
@@ -4893,7 +4892,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4893 4892
4894 drop_on_err = 1; 4893 drop_on_err = 1;
4895 4894
4896 err = btrfs_init_inode_security(trans, inode, dir); 4895 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
4897 if (err) 4896 if (err)
4898 goto out_fail; 4897 goto out_fail;
4899 4898
@@ -6056,6 +6055,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6056 if (!skip_sum) { 6055 if (!skip_sum) {
6057 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); 6056 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6058 if (!dip->csums) { 6057 if (!dip->csums) {
6058 kfree(dip);
6059 ret = -ENOMEM; 6059 ret = -ENOMEM;
6060 goto free_ordered; 6060 goto free_ordered;
6061 } 6061 }
@@ -7104,7 +7104,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
7104 if (IS_ERR(inode)) 7104 if (IS_ERR(inode))
7105 goto out_unlock; 7105 goto out_unlock;
7106 7106
7107 err = btrfs_init_inode_security(trans, inode, dir); 7107 err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
7108 if (err) { 7108 if (err) {
7109 drop_inode = 1; 7109 drop_inode = 1;
7110 goto out_unlock; 7110 goto out_unlock;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5fdb2abc4fa7..d1bace3df9b6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -158,7 +158,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
158 FS_SYNC_FL | FS_DIRSYNC_FL)) 158 FS_SYNC_FL | FS_DIRSYNC_FL))
159 return -EOPNOTSUPP; 159 return -EOPNOTSUPP;
160 160
161 if (!is_owner_or_cap(inode)) 161 if (!inode_owner_or_capable(inode))
162 return -EACCES; 162 return -EACCES;
163 163
164 mutex_lock(&inode->i_mutex); 164 mutex_lock(&inode->i_mutex);
@@ -1077,7 +1077,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
1077 if (flags & ~BTRFS_SUBVOL_RDONLY) 1077 if (flags & ~BTRFS_SUBVOL_RDONLY)
1078 return -EOPNOTSUPP; 1078 return -EOPNOTSUPP;
1079 1079
1080 if (!is_owner_or_cap(inode)) 1080 if (!inode_owner_or_capable(inode))
1081 return -EACCES; 1081 return -EACCES;
1082 1082
1083 down_write(&root->fs_info->subvol_sem); 1083 down_write(&root->fs_info->subvol_sem);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index a5776531dc2b..d779cefcfd7d 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -370,7 +370,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
370} 370}
371 371
372int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, 372int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
373 struct inode *inode, struct inode *dir) 373 struct inode *inode, struct inode *dir,
374 const struct qstr *qstr)
374{ 375{
375 int err; 376 int err;
376 size_t len; 377 size_t len;
@@ -378,7 +379,8 @@ int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
378 char *suffix; 379 char *suffix;
379 char *name; 380 char *name;
380 381
381 err = security_inode_init_security(inode, dir, &suffix, &value, &len); 382 err = security_inode_init_security(inode, dir, qstr, &suffix, &value,
383 &len);
382 if (err) { 384 if (err) {
383 if (err == -EOPNOTSUPP) 385 if (err == -EOPNOTSUPP)
384 return 0; 386 return 0;
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index 7a43fd640bbb..b3cc8039134b 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -37,6 +37,7 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name,
37extern int btrfs_removexattr(struct dentry *dentry, const char *name); 37extern int btrfs_removexattr(struct dentry *dentry, const char *name);
38 38
39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans, 39extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
40 struct inode *inode, struct inode *dir); 40 struct inode *inode, struct inode *dir,
41 const struct qstr *qstr);
41 42
42#endif /* __XATTR__ */ 43#endif /* __XATTR__ */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index f5ec2d44150d..faccd47c6c46 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -57,7 +57,8 @@ static struct list_head *zlib_alloc_workspace(void)
57 if (!workspace) 57 if (!workspace)
58 return ERR_PTR(-ENOMEM); 58 return ERR_PTR(-ENOMEM);
59 59
60 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 60 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize(
61 MAX_WBITS, MAX_MEM_LEVEL));
61 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 62 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
62 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 63 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
63 if (!workspace->def_strm.workspace || 64 if (!workspace->def_strm.workspace ||
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 42c7fafc8bfe..a0358c2189cb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -275,6 +275,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
275 bool preemptive) 275 bool preemptive)
276{ 276{
277 struct dentry *grave, *trap; 277 struct dentry *grave, *trap;
278 struct path path, path_to_graveyard;
278 char nbuffer[8 + 8 + 1]; 279 char nbuffer[8 + 8 + 1];
279 int ret; 280 int ret;
280 281
@@ -287,10 +288,18 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
287 /* non-directories can just be unlinked */ 288 /* non-directories can just be unlinked */
288 if (!S_ISDIR(rep->d_inode->i_mode)) { 289 if (!S_ISDIR(rep->d_inode->i_mode)) {
289 _debug("unlink stale object"); 290 _debug("unlink stale object");
290 ret = vfs_unlink(dir->d_inode, rep);
291 291
292 if (preemptive) 292 path.mnt = cache->mnt;
293 cachefiles_mark_object_buried(cache, rep); 293 path.dentry = dir;
294 ret = security_path_unlink(&path, rep);
295 if (ret < 0) {
296 cachefiles_io_error(cache, "Unlink security error");
297 } else {
298 ret = vfs_unlink(dir->d_inode, rep);
299
300 if (preemptive)
301 cachefiles_mark_object_buried(cache, rep);
302 }
294 303
295 mutex_unlock(&dir->d_inode->i_mutex); 304 mutex_unlock(&dir->d_inode->i_mutex);
296 305
@@ -379,12 +388,23 @@ try_again:
379 } 388 }
380 389
381 /* attempt the rename */ 390 /* attempt the rename */
382 ret = vfs_rename(dir->d_inode, rep, cache->graveyard->d_inode, grave); 391 path.mnt = cache->mnt;
383 if (ret != 0 && ret != -ENOMEM) 392 path.dentry = dir;
384 cachefiles_io_error(cache, "Rename failed with error %d", ret); 393 path_to_graveyard.mnt = cache->mnt;
394 path_to_graveyard.dentry = cache->graveyard;
395 ret = security_path_rename(&path, rep, &path_to_graveyard, grave);
396 if (ret < 0) {
397 cachefiles_io_error(cache, "Rename security error %d", ret);
398 } else {
399 ret = vfs_rename(dir->d_inode, rep,
400 cache->graveyard->d_inode, grave);
401 if (ret != 0 && ret != -ENOMEM)
402 cachefiles_io_error(cache,
403 "Rename failed with error %d", ret);
385 404
386 if (preemptive) 405 if (preemptive)
387 cachefiles_mark_object_buried(cache, rep); 406 cachefiles_mark_object_buried(cache, rep);
407 }
388 408
389 unlock_rename(cache->graveyard, dir); 409 unlock_rename(cache->graveyard, dir);
390 dput(grave); 410 dput(grave);
@@ -448,6 +468,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
448{ 468{
449 struct cachefiles_cache *cache; 469 struct cachefiles_cache *cache;
450 struct dentry *dir, *next = NULL; 470 struct dentry *dir, *next = NULL;
471 struct path path;
451 unsigned long start; 472 unsigned long start;
452 const char *name; 473 const char *name;
453 int ret, nlen; 474 int ret, nlen;
@@ -458,6 +479,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
458 479
459 cache = container_of(parent->fscache.cache, 480 cache = container_of(parent->fscache.cache,
460 struct cachefiles_cache, cache); 481 struct cachefiles_cache, cache);
482 path.mnt = cache->mnt;
461 483
462 ASSERT(parent->dentry); 484 ASSERT(parent->dentry);
463 ASSERT(parent->dentry->d_inode); 485 ASSERT(parent->dentry->d_inode);
@@ -511,6 +533,10 @@ lookup_again:
511 if (ret < 0) 533 if (ret < 0)
512 goto create_error; 534 goto create_error;
513 535
536 path.dentry = dir;
537 ret = security_path_mkdir(&path, next, 0);
538 if (ret < 0)
539 goto create_error;
514 start = jiffies; 540 start = jiffies;
515 ret = vfs_mkdir(dir->d_inode, next, 0); 541 ret = vfs_mkdir(dir->d_inode, next, 0);
516 cachefiles_hist(cachefiles_mkdir_histogram, start); 542 cachefiles_hist(cachefiles_mkdir_histogram, start);
@@ -536,6 +562,10 @@ lookup_again:
536 if (ret < 0) 562 if (ret < 0)
537 goto create_error; 563 goto create_error;
538 564
565 path.dentry = dir;
566 ret = security_path_mknod(&path, next, S_IFREG, 0);
567 if (ret < 0)
568 goto create_error;
539 start = jiffies; 569 start = jiffies;
540 ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); 570 ret = vfs_create(dir->d_inode, next, S_IFREG, NULL);
541 cachefiles_hist(cachefiles_create_histogram, start); 571 cachefiles_hist(cachefiles_create_histogram, start);
@@ -692,6 +722,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
692{ 722{
693 struct dentry *subdir; 723 struct dentry *subdir;
694 unsigned long start; 724 unsigned long start;
725 struct path path;
695 int ret; 726 int ret;
696 727
697 _enter(",,%s", dirname); 728 _enter(",,%s", dirname);
@@ -719,6 +750,11 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
719 750
720 _debug("attempt mkdir"); 751 _debug("attempt mkdir");
721 752
753 path.mnt = cache->mnt;
754 path.dentry = dir;
755 ret = security_path_mkdir(&path, subdir, 0700);
756 if (ret < 0)
757 goto mkdir_error;
722 ret = vfs_mkdir(dir->d_inode, subdir, 0700); 758 ret = vfs_mkdir(dir->d_inode, subdir, 0700);
723 if (ret < 0) 759 if (ret < 0)
724 goto mkdir_error; 760 goto mkdir_error;
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 08f65faac112..0dba6915712b 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -210,8 +210,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
210 if (!fsc->debugfs_congestion_kb) 210 if (!fsc->debugfs_congestion_kb)
211 goto out; 211 goto out;
212 212
213 dout("a\n");
214
215 snprintf(name, sizeof(name), "../../bdi/%s", 213 snprintf(name, sizeof(name), "../../bdi/%s",
216 dev_name(fsc->backing_dev_info.dev)); 214 dev_name(fsc->backing_dev_info.dev));
217 fsc->debugfs_bdi = 215 fsc->debugfs_bdi =
@@ -221,7 +219,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
221 if (!fsc->debugfs_bdi) 219 if (!fsc->debugfs_bdi)
222 goto out; 220 goto out;
223 221
224 dout("b\n");
225 fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", 222 fsc->debugfs_mdsmap = debugfs_create_file("mdsmap",
226 0600, 223 0600,
227 fsc->client->debugfs_dir, 224 fsc->client->debugfs_dir,
@@ -230,7 +227,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
230 if (!fsc->debugfs_mdsmap) 227 if (!fsc->debugfs_mdsmap)
231 goto out; 228 goto out;
232 229
233 dout("ca\n");
234 fsc->debugfs_mdsc = debugfs_create_file("mdsc", 230 fsc->debugfs_mdsc = debugfs_create_file("mdsc",
235 0600, 231 0600,
236 fsc->client->debugfs_dir, 232 fsc->client->debugfs_dir,
@@ -239,7 +235,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
239 if (!fsc->debugfs_mdsc) 235 if (!fsc->debugfs_mdsc)
240 goto out; 236 goto out;
241 237
242 dout("da\n");
243 fsc->debugfs_caps = debugfs_create_file("caps", 238 fsc->debugfs_caps = debugfs_create_file("caps",
244 0400, 239 0400,
245 fsc->client->debugfs_dir, 240 fsc->client->debugfs_dir,
@@ -248,7 +243,6 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
248 if (!fsc->debugfs_caps) 243 if (!fsc->debugfs_caps)
249 goto out; 244 goto out;
250 245
251 dout("ea\n");
252 fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", 246 fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru",
253 0600, 247 0600,
254 fsc->client->debugfs_dir, 248 fsc->client->debugfs_dir,
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f0aef787a102..1a867a3601ae 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -60,7 +60,6 @@ int ceph_init_dentry(struct dentry *dentry)
60 } 60 }
61 di->dentry = dentry; 61 di->dentry = dentry;
62 di->lease_session = NULL; 62 di->lease_session = NULL;
63 di->parent_inode = igrab(dentry->d_parent->d_inode);
64 dentry->d_fsdata = di; 63 dentry->d_fsdata = di;
65 dentry->d_time = jiffies; 64 dentry->d_time = jiffies;
66 ceph_dentry_lru_add(dentry); 65 ceph_dentry_lru_add(dentry);
@@ -162,7 +161,7 @@ more:
162 filp->f_pos = di->offset; 161 filp->f_pos = di->offset;
163 err = filldir(dirent, dentry->d_name.name, 162 err = filldir(dirent, dentry->d_name.name,
164 dentry->d_name.len, di->offset, 163 dentry->d_name.len, di->offset,
165 dentry->d_inode->i_ino, 164 ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
166 dentry->d_inode->i_mode >> 12); 165 dentry->d_inode->i_mode >> 12);
167 166
168 if (last) { 167 if (last) {
@@ -246,15 +245,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
246 245
247 dout("readdir off 0 -> '.'\n"); 246 dout("readdir off 0 -> '.'\n");
248 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), 247 if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0),
249 inode->i_ino, inode->i_mode >> 12) < 0) 248 ceph_translate_ino(inode->i_sb, inode->i_ino),
249 inode->i_mode >> 12) < 0)
250 return 0; 250 return 0;
251 filp->f_pos = 1; 251 filp->f_pos = 1;
252 off = 1; 252 off = 1;
253 } 253 }
254 if (filp->f_pos == 1) { 254 if (filp->f_pos == 1) {
255 ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
255 dout("readdir off 1 -> '..'\n"); 256 dout("readdir off 1 -> '..'\n");
256 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), 257 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
257 filp->f_dentry->d_parent->d_inode->i_ino, 258 ceph_translate_ino(inode->i_sb, ino),
258 inode->i_mode >> 12) < 0) 259 inode->i_mode >> 12) < 0)
259 return 0; 260 return 0;
260 filp->f_pos = 2; 261 filp->f_pos = 2;
@@ -378,7 +379,8 @@ more:
378 if (filldir(dirent, 379 if (filldir(dirent,
379 rinfo->dir_dname[off - fi->offset], 380 rinfo->dir_dname[off - fi->offset],
380 rinfo->dir_dname_len[off - fi->offset], 381 rinfo->dir_dname_len[off - fi->offset],
381 pos, ino, ftype) < 0) { 382 pos,
383 ceph_translate_ino(inode->i_sb, ino), ftype) < 0) {
382 dout("filldir stopping us...\n"); 384 dout("filldir stopping us...\n");
383 return 0; 385 return 0;
384 } 386 }
@@ -410,7 +412,7 @@ more:
410 spin_lock(&inode->i_lock); 412 spin_lock(&inode->i_lock);
411 if (ci->i_release_count == fi->dir_release_count) { 413 if (ci->i_release_count == fi->dir_release_count) {
412 dout(" marking %p complete\n", inode); 414 dout(" marking %p complete\n", inode);
413 ci->i_ceph_flags |= CEPH_I_COMPLETE; 415 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
414 ci->i_max_offset = filp->f_pos; 416 ci->i_max_offset = filp->f_pos;
415 } 417 }
416 spin_unlock(&inode->i_lock); 418 spin_unlock(&inode->i_lock);
@@ -497,6 +499,7 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
497 499
498 /* .snap dir? */ 500 /* .snap dir? */
499 if (err == -ENOENT && 501 if (err == -ENOENT &&
502 ceph_snap(parent) == CEPH_NOSNAP &&
500 strcmp(dentry->d_name.name, 503 strcmp(dentry->d_name.name,
501 fsc->mount_options->snapdir_name) == 0) { 504 fsc->mount_options->snapdir_name) == 0) {
502 struct inode *inode = ceph_get_snapdir(parent); 505 struct inode *inode = ceph_get_snapdir(parent);
@@ -993,7 +996,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
993{ 996{
994 struct inode *dir; 997 struct inode *dir;
995 998
996 if (nd->flags & LOOKUP_RCU) 999 if (nd && nd->flags & LOOKUP_RCU)
997 return -ECHILD; 1000 return -ECHILD;
998 1001
999 dir = dentry->d_parent->d_inode; 1002 dir = dentry->d_parent->d_inode;
@@ -1024,34 +1027,13 @@ out_touch:
1024} 1027}
1025 1028
1026/* 1029/*
1027 * When a dentry is released, clear the dir I_COMPLETE if it was part 1030 * Release our ceph_dentry_info.
1028 * of the current dir gen or if this is in the snapshot namespace.
1029 */ 1031 */
1030static void ceph_dentry_release(struct dentry *dentry) 1032static void ceph_d_release(struct dentry *dentry)
1031{ 1033{
1032 struct ceph_dentry_info *di = ceph_dentry(dentry); 1034 struct ceph_dentry_info *di = ceph_dentry(dentry);
1033 struct inode *parent_inode = NULL;
1034 u64 snapid = CEPH_NOSNAP;
1035 1035
1036 if (!IS_ROOT(dentry)) { 1036 dout("d_release %p\n", dentry);
1037 parent_inode = di->parent_inode;
1038 if (parent_inode)
1039 snapid = ceph_snap(parent_inode);
1040 }
1041 dout("dentry_release %p parent %p\n", dentry, parent_inode);
1042 if (parent_inode && snapid != CEPH_SNAPDIR) {
1043 struct ceph_inode_info *ci = ceph_inode(parent_inode);
1044
1045 spin_lock(&parent_inode->i_lock);
1046 if (ci->i_shared_gen == di->lease_shared_gen ||
1047 snapid <= CEPH_MAXSNAP) {
1048 dout(" clearing %p complete (d_release)\n",
1049 parent_inode);
1050 ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
1051 ci->i_release_count++;
1052 }
1053 spin_unlock(&parent_inode->i_lock);
1054 }
1055 if (di) { 1037 if (di) {
1056 ceph_dentry_lru_del(dentry); 1038 ceph_dentry_lru_del(dentry);
1057 if (di->lease_session) 1039 if (di->lease_session)
@@ -1059,8 +1041,6 @@ static void ceph_dentry_release(struct dentry *dentry)
1059 kmem_cache_free(ceph_dentry_cachep, di); 1041 kmem_cache_free(ceph_dentry_cachep, di);
1060 dentry->d_fsdata = NULL; 1042 dentry->d_fsdata = NULL;
1061 } 1043 }
1062 if (parent_inode)
1063 iput(parent_inode);
1064} 1044}
1065 1045
1066static int ceph_snapdir_d_revalidate(struct dentry *dentry, 1046static int ceph_snapdir_d_revalidate(struct dentry *dentry,
@@ -1278,14 +1258,14 @@ const struct inode_operations ceph_dir_iops = {
1278 1258
1279const struct dentry_operations ceph_dentry_ops = { 1259const struct dentry_operations ceph_dentry_ops = {
1280 .d_revalidate = ceph_d_revalidate, 1260 .d_revalidate = ceph_d_revalidate,
1281 .d_release = ceph_dentry_release, 1261 .d_release = ceph_d_release,
1282}; 1262};
1283 1263
1284const struct dentry_operations ceph_snapdir_dentry_ops = { 1264const struct dentry_operations ceph_snapdir_dentry_ops = {
1285 .d_revalidate = ceph_snapdir_d_revalidate, 1265 .d_revalidate = ceph_snapdir_d_revalidate,
1286 .d_release = ceph_dentry_release, 1266 .d_release = ceph_d_release,
1287}; 1267};
1288 1268
1289const struct dentry_operations ceph_snap_dentry_ops = { 1269const struct dentry_operations ceph_snap_dentry_ops = {
1290 .d_release = ceph_dentry_release, 1270 .d_release = ceph_d_release,
1291}; 1271};
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 7d0e4a82d898..159b512d5a27 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -564,11 +564,19 @@ more:
564 * start_request so that a tid has been assigned. 564 * start_request so that a tid has been assigned.
565 */ 565 */
566 spin_lock(&ci->i_unsafe_lock); 566 spin_lock(&ci->i_unsafe_lock);
567 list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); 567 list_add_tail(&req->r_unsafe_item,
568 &ci->i_unsafe_writes);
568 spin_unlock(&ci->i_unsafe_lock); 569 spin_unlock(&ci->i_unsafe_lock);
569 ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); 570 ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
570 } 571 }
572
571 ret = ceph_osdc_wait_request(&fsc->client->osdc, req); 573 ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
574 if (ret < 0 && req->r_safe_callback) {
575 spin_lock(&ci->i_unsafe_lock);
576 list_del_init(&req->r_unsafe_item);
577 spin_unlock(&ci->i_unsafe_lock);
578 ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
579 }
572 } 580 }
573 581
574 if (file->f_flags & O_DIRECT) 582 if (file->f_flags & O_DIRECT)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5625463aa479..b54c97da1c43 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -36,6 +36,13 @@ static void ceph_vmtruncate_work(struct work_struct *work);
36/* 36/*
37 * find or create an inode, given the ceph ino number 37 * find or create an inode, given the ceph ino number
38 */ 38 */
39static int ceph_set_ino_cb(struct inode *inode, void *data)
40{
41 ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
42 inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
43 return 0;
44}
45
39struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) 46struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
40{ 47{
41 struct inode *inode; 48 struct inode *inode;
@@ -707,7 +714,7 @@ static int fill_inode(struct inode *inode,
707 (issued & CEPH_CAP_FILE_EXCL) == 0 && 714 (issued & CEPH_CAP_FILE_EXCL) == 0 &&
708 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { 715 (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
709 dout(" marking %p complete (empty)\n", inode); 716 dout(" marking %p complete (empty)\n", inode);
710 ci->i_ceph_flags |= CEPH_I_COMPLETE; 717 /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
711 ci->i_max_offset = 2; 718 ci->i_max_offset = 2;
712 } 719 }
713 break; 720 break;
@@ -1030,9 +1037,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1030 dout("fill_trace doing d_move %p -> %p\n", 1037 dout("fill_trace doing d_move %p -> %p\n",
1031 req->r_old_dentry, dn); 1038 req->r_old_dentry, dn);
1032 1039
1033 /* d_move screws up d_subdirs order */
1034 ceph_i_clear(dir, CEPH_I_COMPLETE);
1035
1036 d_move(req->r_old_dentry, dn); 1040 d_move(req->r_old_dentry, dn);
1037 dout(" src %p '%.*s' dst %p '%.*s'\n", 1041 dout(" src %p '%.*s' dst %p '%.*s'\n",
1038 req->r_old_dentry, 1042 req->r_old_dentry,
@@ -1044,12 +1048,15 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
1044 rehashing bug in vfs_rename_dir */ 1048 rehashing bug in vfs_rename_dir */
1045 ceph_invalidate_dentry_lease(dn); 1049 ceph_invalidate_dentry_lease(dn);
1046 1050
1047 /* take overwritten dentry's readdir offset */ 1051 /*
1048 dout("dn %p gets %p offset %lld (old offset %lld)\n", 1052 * d_move() puts the renamed dentry at the end of
1049 req->r_old_dentry, dn, ceph_dentry(dn)->offset, 1053 * d_subdirs. We need to assign it an appropriate
1054 * directory offset so we can behave when holding
1055 * I_COMPLETE.
1056 */
1057 ceph_set_dentry_offset(req->r_old_dentry);
1058 dout("dn %p gets new offset %lld\n", req->r_old_dentry,
1050 ceph_dentry(req->r_old_dentry)->offset); 1059 ceph_dentry(req->r_old_dentry)->offset);
1051 ceph_dentry(req->r_old_dentry)->offset =
1052 ceph_dentry(dn)->offset;
1053 1060
1054 dn = req->r_old_dentry; /* use old_dentry */ 1061 dn = req->r_old_dentry; /* use old_dentry */
1055 in = dn->d_inode; 1062 in = dn->d_inode;
@@ -1809,7 +1816,7 @@ int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
1809 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL); 1816 err = ceph_do_getattr(inode, CEPH_STAT_CAP_INODE_ALL);
1810 if (!err) { 1817 if (!err) {
1811 generic_fillattr(inode, stat); 1818 generic_fillattr(inode, stat);
1812 stat->ino = inode->i_ino; 1819 stat->ino = ceph_translate_ino(inode->i_sb, inode->i_ino);
1813 if (ceph_snap(inode) != CEPH_NOSNAP) 1820 if (ceph_snap(inode) != CEPH_NOSNAP)
1814 stat->dev = ceph_snap(inode); 1821 stat->dev = ceph_snap(inode);
1815 else 1822 else
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9c5085465a63..a9e78b4a258c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -131,6 +131,7 @@ enum {
131 Opt_rbytes, 131 Opt_rbytes,
132 Opt_norbytes, 132 Opt_norbytes,
133 Opt_noasyncreaddir, 133 Opt_noasyncreaddir,
134 Opt_ino32,
134}; 135};
135 136
136static match_table_t fsopt_tokens = { 137static match_table_t fsopt_tokens = {
@@ -150,6 +151,7 @@ static match_table_t fsopt_tokens = {
150 {Opt_rbytes, "rbytes"}, 151 {Opt_rbytes, "rbytes"},
151 {Opt_norbytes, "norbytes"}, 152 {Opt_norbytes, "norbytes"},
152 {Opt_noasyncreaddir, "noasyncreaddir"}, 153 {Opt_noasyncreaddir, "noasyncreaddir"},
154 {Opt_ino32, "ino32"},
153 {-1, NULL} 155 {-1, NULL}
154}; 156};
155 157
@@ -225,6 +227,9 @@ static int parse_fsopt_token(char *c, void *private)
225 case Opt_noasyncreaddir: 227 case Opt_noasyncreaddir:
226 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; 228 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
227 break; 229 break;
230 case Opt_ino32:
231 fsopt->flags |= CEPH_MOUNT_OPT_INO32;
232 break;
228 default: 233 default:
229 BUG_ON(token); 234 BUG_ON(token);
230 } 235 }
@@ -288,7 +293,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
288 fsopt->sb_flags = flags; 293 fsopt->sb_flags = flags;
289 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; 294 fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
290 295
291 fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; 296 fsopt->rsize = CEPH_RSIZE_DEFAULT;
292 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); 297 fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
293 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; 298 fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
294 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; 299 fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
@@ -370,7 +375,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
370 375
371 if (fsopt->wsize) 376 if (fsopt->wsize)
372 seq_printf(m, ",wsize=%d", fsopt->wsize); 377 seq_printf(m, ",wsize=%d", fsopt->wsize);
373 if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) 378 if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
374 seq_printf(m, ",rsize=%d", fsopt->rsize); 379 seq_printf(m, ",rsize=%d", fsopt->rsize);
375 if (fsopt->congestion_kb != default_congestion_kb()) 380 if (fsopt->congestion_kb != default_congestion_kb())
376 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); 381 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 88fcaa21b801..619fe719968f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -27,6 +27,7 @@
27#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ 27#define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */
28#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ 28#define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */
29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ 29#define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */
30#define CEPH_MOUNT_OPT_INO32 (1<<8) /* 32 bit inos */
30 31
31#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) 32#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES)
32 33
@@ -35,6 +36,7 @@
35#define ceph_test_mount_opt(fsc, opt) \ 36#define ceph_test_mount_opt(fsc, opt) \
36 (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) 37 (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
37 38
39#define CEPH_RSIZE_DEFAULT (512*1024) /* readahead */
38#define CEPH_MAX_READDIR_DEFAULT 1024 40#define CEPH_MAX_READDIR_DEFAULT 1024
39#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) 41#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
40#define CEPH_SNAPDIRNAME_DEFAULT ".snap" 42#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
@@ -207,7 +209,6 @@ struct ceph_dentry_info {
207 struct dentry *dentry; 209 struct dentry *dentry;
208 u64 time; 210 u64 time;
209 u64 offset; 211 u64 offset;
210 struct inode *parent_inode;
211}; 212};
212 213
213struct ceph_inode_xattrs_info { 214struct ceph_inode_xattrs_info {
@@ -320,6 +321,16 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode)
320 return container_of(inode, struct ceph_inode_info, vfs_inode); 321 return container_of(inode, struct ceph_inode_info, vfs_inode);
321} 322}
322 323
324static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
325{
326 return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
327}
328
329static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
330{
331 return (struct ceph_fs_client *)sb->s_fs_info;
332}
333
323static inline struct ceph_vino ceph_vino(struct inode *inode) 334static inline struct ceph_vino ceph_vino(struct inode *inode)
324{ 335{
325 return ceph_inode(inode)->i_vino; 336 return ceph_inode(inode)->i_vino;
@@ -328,19 +339,49 @@ static inline struct ceph_vino ceph_vino(struct inode *inode)
328/* 339/*
329 * ino_t is <64 bits on many architectures, blech. 340 * ino_t is <64 bits on many architectures, blech.
330 * 341 *
331 * don't include snap in ino hash, at least for now. 342 * i_ino (kernel inode) st_ino (userspace)
343 * i386 32 32
344 * x86_64+ino32 64 32
345 * x86_64 64 64
346 */
347static inline u32 ceph_ino_to_ino32(ino_t ino)
348{
349 ino ^= ino >> (sizeof(ino) * 8 - 32);
350 if (!ino)
351 ino = 1;
352 return ino;
353}
354
355/*
356 * kernel i_ino value
332 */ 357 */
333static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) 358static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
334{ 359{
335 ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ 360 ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */
336#if BITS_PER_LONG == 32 361#if BITS_PER_LONG == 32
337 ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; 362 ino = ceph_ino_to_ino32(ino);
338 if (!ino)
339 ino = 1;
340#endif 363#endif
341 return ino; 364 return ino;
342} 365}
343 366
367/*
368 * user-visible ino (stat, filldir)
369 */
370#if BITS_PER_LONG == 32
371static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
372{
373 return ino;
374}
375#else
376static inline ino_t ceph_translate_ino(struct super_block *sb, ino_t ino)
377{
378 if (ceph_test_mount_opt(ceph_sb_to_client(sb), INO32))
379 ino = ceph_ino_to_ino32(ino);
380 return ino;
381}
382#endif
383
384
344/* for printf-style formatting */ 385/* for printf-style formatting */
345#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap 386#define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap
346 387
@@ -429,13 +470,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
429 return ((loff_t)frag << 32) | (loff_t)off; 470 return ((loff_t)frag << 32) | (loff_t)off;
430} 471}
431 472
432static inline int ceph_set_ino_cb(struct inode *inode, void *data)
433{
434 ceph_inode(inode)->i_vino = *(struct ceph_vino *)data;
435 inode->i_ino = ceph_vino_to_ino(*(struct ceph_vino *)data);
436 return 0;
437}
438
439/* 473/*
440 * caps helpers 474 * caps helpers
441 */ 475 */
@@ -504,15 +538,6 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
504 int *total, int *avail, int *used, 538 int *total, int *avail, int *used,
505 int *reserved, int *min); 539 int *reserved, int *min);
506 540
507static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode)
508{
509 return (struct ceph_fs_client *)inode->i_sb->s_fs_info;
510}
511
512static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb)
513{
514 return (struct ceph_fs_client *)sb->s_fs_info;
515}
516 541
517 542
518/* 543/*
diff --git a/fs/coda/Makefile b/fs/coda/Makefile
index 6c22e61da397..1bab69a0d347 100644
--- a/fs/coda/Makefile
+++ b/fs/coda/Makefile
@@ -9,4 +9,4 @@ coda-objs := psdev.o cache.o cnode.o inode.o dir.o file.o upcall.o \
9 9
10# If you want debugging output, please uncomment the following line. 10# If you want debugging output, please uncomment the following line.
11 11
12# EXTRA_CFLAGS += -DDEBUG -DDEBUG_SMB_MALLOC=1 12# ccflags-y := -DDEBUG -DDEBUG_SMB_MALLOC=1
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index c6405ce3c50e..06d27a41807f 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -13,7 +13,6 @@
13 13
14#ifdef CONFIG_SYSCTL 14#ifdef CONFIG_SYSCTL
15static struct ctl_table_header *fs_table_header; 15static struct ctl_table_header *fs_table_header;
16#endif
17 16
18static ctl_table coda_table[] = { 17static ctl_table coda_table[] = {
19 { 18 {
@@ -40,7 +39,6 @@ static ctl_table coda_table[] = {
40 {} 39 {}
41}; 40};
42 41
43#ifdef CONFIG_SYSCTL
44static ctl_table fs_table[] = { 42static ctl_table fs_table[] = {
45 { 43 {
46 .procname = "coda", 44 .procname = "coda",
@@ -49,22 +47,18 @@ static ctl_table fs_table[] = {
49 }, 47 },
50 {} 48 {}
51}; 49};
52#endif
53 50
54void coda_sysctl_init(void) 51void coda_sysctl_init(void)
55{ 52{
56#ifdef CONFIG_SYSCTL
57 if ( !fs_table_header ) 53 if ( !fs_table_header )
58 fs_table_header = register_sysctl_table(fs_table); 54 fs_table_header = register_sysctl_table(fs_table);
59#endif
60} 55}
61 56
62void coda_sysctl_clean(void) 57void coda_sysctl_clean(void)
63{ 58{
64#ifdef CONFIG_SYSCTL
65 if ( fs_table_header ) { 59 if ( fs_table_header ) {
66 unregister_sysctl_table(fs_table_header); 60 unregister_sysctl_table(fs_table_header);
67 fs_table_header = NULL; 61 fs_table_header = NULL;
68 } 62 }
69#endif
70} 63}
64#endif
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6cc..72fe6cda9108 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
262 */ 262 */
263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) 263asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
264{ 264{
265 struct path path; 265 struct kstatfs tmp;
266 int error; 266 int error = user_statfs(pathname, &tmp);
267 267 if (!error)
268 error = user_path(pathname, &path); 268 error = put_compat_statfs(buf, &tmp);
269 if (!error) {
270 struct kstatfs tmp;
271 error = vfs_statfs(&path, &tmp);
272 if (!error)
273 error = put_compat_statfs(buf, &tmp);
274 path_put(&path);
275 }
276 return error; 269 return error;
277} 270}
278 271
279asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) 272asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
280{ 273{
281 struct file * file;
282 struct kstatfs tmp; 274 struct kstatfs tmp;
283 int error; 275 int error = fd_statfs(fd, &tmp);
284
285 error = -EBADF;
286 file = fget(fd);
287 if (!file)
288 goto out;
289 error = vfs_statfs(&file->f_path, &tmp);
290 if (!error) 276 if (!error)
291 error = put_compat_statfs(buf, &tmp); 277 error = put_compat_statfs(buf, &tmp);
292 fput(file);
293out:
294 return error; 278 return error;
295} 279}
296 280
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
329 313
330asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) 314asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
331{ 315{
332 struct path path; 316 struct kstatfs tmp;
333 int error; 317 int error;
334 318
335 if (sz != sizeof(*buf)) 319 if (sz != sizeof(*buf))
336 return -EINVAL; 320 return -EINVAL;
337 321
338 error = user_path(pathname, &path); 322 error = user_statfs(pathname, &tmp);
339 if (!error) { 323 if (!error)
340 struct kstatfs tmp; 324 error = put_compat_statfs64(buf, &tmp);
341 error = vfs_statfs(&path, &tmp);
342 if (!error)
343 error = put_compat_statfs64(buf, &tmp);
344 path_put(&path);
345 }
346 return error; 325 return error;
347} 326}
348 327
349asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) 328asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
350{ 329{
351 struct file * file;
352 struct kstatfs tmp; 330 struct kstatfs tmp;
353 int error; 331 int error;
354 332
355 if (sz != sizeof(*buf)) 333 if (sz != sizeof(*buf))
356 return -EINVAL; 334 return -EINVAL;
357 335
358 error = -EBADF; 336 error = fd_statfs(fd, &tmp);
359 file = fget(fd);
360 if (!file)
361 goto out;
362 error = vfs_statfs(&file->f_path, &tmp);
363 if (!error) 337 if (!error)
364 error = put_compat_statfs64(buf, &tmp); 338 error = put_compat_statfs64(buf, &tmp);
365 fput(file);
366out:
367 return error; 339 return error;
368} 340}
369 341
@@ -1228,7 +1200,9 @@ compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1228 file = fget_light(fd, &fput_needed); 1200 file = fget_light(fd, &fput_needed);
1229 if (!file) 1201 if (!file)
1230 return -EBADF; 1202 return -EBADF;
1231 ret = compat_readv(file, vec, vlen, &pos); 1203 ret = -ESPIPE;
1204 if (file->f_mode & FMODE_PREAD)
1205 ret = compat_readv(file, vec, vlen, &pos);
1232 fput_light(file, fput_needed); 1206 fput_light(file, fput_needed);
1233 return ret; 1207 return ret;
1234} 1208}
@@ -1285,7 +1259,9 @@ compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1285 file = fget_light(fd, &fput_needed); 1259 file = fget_light(fd, &fput_needed);
1286 if (!file) 1260 if (!file)
1287 return -EBADF; 1261 return -EBADF;
1288 ret = compat_writev(file, vec, vlen, &pos); 1262 ret = -ESPIPE;
1263 if (file->f_mode & FMODE_PWRITE)
1264 ret = compat_writev(file, vec, vlen, &pos);
1289 fput_light(file, fput_needed); 1265 fput_light(file, fput_needed);
1290 return ret; 1266 return ret;
1291} 1267}
@@ -1695,9 +1671,6 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1695 * Update: ERESTARTSYS breaks at least the xview clock binary, so 1671 * Update: ERESTARTSYS breaks at least the xview clock binary, so
1696 * I'm trying ERESTARTNOHAND which restart only when you want to. 1672 * I'm trying ERESTARTNOHAND which restart only when you want to.
1697 */ 1673 */
1698#define MAX_SELECT_SECONDS \
1699 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1700
1701int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1674int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1702 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1675 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1703 struct timespec *end_time) 1676 struct timespec *end_time)
@@ -2308,3 +2281,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd,
2308} 2281}
2309 2282
2310#endif /* CONFIG_TIMERFD */ 2283#endif /* CONFIG_TIMERFD */
2284
2285#ifdef CONFIG_FHANDLE
2286/*
2287 * Exactly like fs/open.c:sys_open_by_handle_at(), except that it
2288 * doesn't set the O_LARGEFILE flag.
2289 */
2290asmlinkage long
2291compat_sys_open_by_handle_at(int mountdirfd,
2292 struct file_handle __user *handle, int flags)
2293{
2294 return do_handle_open(mountdirfd, handle, flags);
2295}
2296#endif
diff --git a/fs/dcache.c b/fs/dcache.c
index 2a6bd9a4ae97..ad25c4cec7d5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
296 __releases(parent->d_lock) 296 __releases(parent->d_lock)
297 __releases(dentry->d_inode->i_lock) 297 __releases(dentry->d_inode->i_lock)
298{ 298{
299 dentry->d_parent = NULL;
300 list_del(&dentry->d_u.d_child); 299 list_del(&dentry->d_u.d_child);
300 /*
301 * Inform try_to_ascend() that we are no longer attached to the
302 * dentry tree
303 */
304 dentry->d_flags |= DCACHE_DISCONNECTED;
301 if (parent) 305 if (parent)
302 spin_unlock(&parent->d_lock); 306 spin_unlock(&parent->d_lock);
303 dentry_iput(dentry); 307 dentry_iput(dentry);
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb)
1012} 1016}
1013 1017
1014/* 1018/*
1019 * This tries to ascend one level of parenthood, but
1020 * we can race with renaming, so we need to re-check
1021 * the parenthood after dropping the lock and check
1022 * that the sequence number still matches.
1023 */
1024static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
1025{
1026 struct dentry *new = old->d_parent;
1027
1028 rcu_read_lock();
1029 spin_unlock(&old->d_lock);
1030 spin_lock(&new->d_lock);
1031
1032 /*
1033 * might go back up the wrong parent if we have had a rename
1034 * or deletion
1035 */
1036 if (new != old->d_parent ||
1037 (old->d_flags & DCACHE_DISCONNECTED) ||
1038 (!locked && read_seqretry(&rename_lock, seq))) {
1039 spin_unlock(&new->d_lock);
1040 new = NULL;
1041 }
1042 rcu_read_unlock();
1043 return new;
1044}
1045
1046
1047/*
1015 * Search for at least 1 mount point in the dentry's subdirs. 1048 * Search for at least 1 mount point in the dentry's subdirs.
1016 * We descend to the next level whenever the d_subdirs 1049 * We descend to the next level whenever the d_subdirs
1017 * list is non-empty and continue searching. 1050 * list is non-empty and continue searching.
@@ -1066,24 +1099,10 @@ resume:
1066 * All done at this level ... ascend and resume the search. 1099 * All done at this level ... ascend and resume the search.
1067 */ 1100 */
1068 if (this_parent != parent) { 1101 if (this_parent != parent) {
1069 struct dentry *tmp; 1102 struct dentry *child = this_parent;
1070 struct dentry *child; 1103 this_parent = try_to_ascend(this_parent, locked, seq);
1071 1104 if (!this_parent)
1072 tmp = this_parent->d_parent;
1073 rcu_read_lock();
1074 spin_unlock(&this_parent->d_lock);
1075 child = this_parent;
1076 this_parent = tmp;
1077 spin_lock(&this_parent->d_lock);
1078 /* might go back up the wrong parent if we have had a rename
1079 * or deletion */
1080 if (this_parent != child->d_parent ||
1081 (!locked && read_seqretry(&rename_lock, seq))) {
1082 spin_unlock(&this_parent->d_lock);
1083 rcu_read_unlock();
1084 goto rename_retry; 1105 goto rename_retry;
1085 }
1086 rcu_read_unlock();
1087 next = child->d_u.d_child.next; 1106 next = child->d_u.d_child.next;
1088 goto resume; 1107 goto resume;
1089 } 1108 }
@@ -1181,24 +1200,10 @@ resume:
1181 * All done at this level ... ascend and resume the search. 1200 * All done at this level ... ascend and resume the search.
1182 */ 1201 */
1183 if (this_parent != parent) { 1202 if (this_parent != parent) {
1184 struct dentry *tmp; 1203 struct dentry *child = this_parent;
1185 struct dentry *child; 1204 this_parent = try_to_ascend(this_parent, locked, seq);
1186 1205 if (!this_parent)
1187 tmp = this_parent->d_parent;
1188 rcu_read_lock();
1189 spin_unlock(&this_parent->d_lock);
1190 child = this_parent;
1191 this_parent = tmp;
1192 spin_lock(&this_parent->d_lock);
1193 /* might go back up the wrong parent if we have had a rename
1194 * or deletion */
1195 if (this_parent != child->d_parent ||
1196 (!locked && read_seqretry(&rename_lock, seq))) {
1197 spin_unlock(&this_parent->d_lock);
1198 rcu_read_unlock();
1199 goto rename_retry; 1206 goto rename_retry;
1200 }
1201 rcu_read_unlock();
1202 next = child->d_u.d_child.next; 1207 next = child->d_u.d_child.next;
1203 goto resume; 1208 goto resume;
1204 } 1209 }
@@ -1523,6 +1528,28 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1523} 1528}
1524EXPORT_SYMBOL(d_alloc_root); 1529EXPORT_SYMBOL(d_alloc_root);
1525 1530
1531static struct dentry * __d_find_any_alias(struct inode *inode)
1532{
1533 struct dentry *alias;
1534
1535 if (list_empty(&inode->i_dentry))
1536 return NULL;
1537 alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
1538 __dget(alias);
1539 return alias;
1540}
1541
1542static struct dentry * d_find_any_alias(struct inode *inode)
1543{
1544 struct dentry *de;
1545
1546 spin_lock(&inode->i_lock);
1547 de = __d_find_any_alias(inode);
1548 spin_unlock(&inode->i_lock);
1549 return de;
1550}
1551
1552
1526/** 1553/**
1527 * d_obtain_alias - find or allocate a dentry for a given inode 1554 * d_obtain_alias - find or allocate a dentry for a given inode
1528 * @inode: inode to allocate the dentry for 1555 * @inode: inode to allocate the dentry for
@@ -1552,7 +1579,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1552 if (IS_ERR(inode)) 1579 if (IS_ERR(inode))
1553 return ERR_CAST(inode); 1580 return ERR_CAST(inode);
1554 1581
1555 res = d_find_alias(inode); 1582 res = d_find_any_alias(inode);
1556 if (res) 1583 if (res)
1557 goto out_iput; 1584 goto out_iput;
1558 1585
@@ -1565,7 +1592,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
1565 1592
1566 1593
1567 spin_lock(&inode->i_lock); 1594 spin_lock(&inode->i_lock);
1568 res = __d_find_alias(inode, 0); 1595 res = __d_find_any_alias(inode);
1569 if (res) { 1596 if (res) {
1570 spin_unlock(&inode->i_lock); 1597 spin_unlock(&inode->i_lock);
1571 dput(tmp); 1598 dput(tmp);
@@ -1585,10 +1612,13 @@ struct dentry *d_obtain_alias(struct inode *inode)
1585 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); 1612 __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first);
1586 spin_unlock(&tmp->d_lock); 1613 spin_unlock(&tmp->d_lock);
1587 spin_unlock(&inode->i_lock); 1614 spin_unlock(&inode->i_lock);
1615 security_d_instantiate(tmp, inode);
1588 1616
1589 return tmp; 1617 return tmp;
1590 1618
1591 out_iput: 1619 out_iput:
1620 if (res && !IS_ERR(res))
1621 security_d_instantiate(res, inode);
1592 iput(inode); 1622 iput(inode);
1593 return res; 1623 return res;
1594} 1624}
@@ -1781,7 +1811,7 @@ struct dentry *__d_lookup_rcu(struct dentry *parent, struct qstr *name,
1781 * false-negative result. d_lookup() protects against concurrent 1811 * false-negative result. d_lookup() protects against concurrent
1782 * renames using rename_lock seqlock. 1812 * renames using rename_lock seqlock.
1783 * 1813 *
1784 * See Documentation/vfs/dcache-locking.txt for more details. 1814 * See Documentation/filesystems/path-lookup.txt for more details.
1785 */ 1815 */
1786 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) { 1816 hlist_bl_for_each_entry_rcu(dentry, node, &b->head, d_hash) {
1787 struct inode *i; 1817 struct inode *i;
@@ -1901,7 +1931,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
1901 * false-negative result. d_lookup() protects against concurrent 1931 * false-negative result. d_lookup() protects against concurrent
1902 * renames using rename_lock seqlock. 1932 * renames using rename_lock seqlock.
1903 * 1933 *
1904 * See Documentation/vfs/dcache-locking.txt for more details. 1934 * See Documentation/filesystems/path-lookup.txt for more details.
1905 */ 1935 */
1906 rcu_read_lock(); 1936 rcu_read_lock();
1907 1937
@@ -2920,28 +2950,14 @@ resume:
2920 spin_unlock(&dentry->d_lock); 2950 spin_unlock(&dentry->d_lock);
2921 } 2951 }
2922 if (this_parent != root) { 2952 if (this_parent != root) {
2923 struct dentry *tmp; 2953 struct dentry *child = this_parent;
2924 struct dentry *child;
2925
2926 tmp = this_parent->d_parent;
2927 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { 2954 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
2928 this_parent->d_flags |= DCACHE_GENOCIDE; 2955 this_parent->d_flags |= DCACHE_GENOCIDE;
2929 this_parent->d_count--; 2956 this_parent->d_count--;
2930 } 2957 }
2931 rcu_read_lock(); 2958 this_parent = try_to_ascend(this_parent, locked, seq);
2932 spin_unlock(&this_parent->d_lock); 2959 if (!this_parent)
2933 child = this_parent;
2934 this_parent = tmp;
2935 spin_lock(&this_parent->d_lock);
2936 /* might go back up the wrong parent if we have had a rename
2937 * or deletion */
2938 if (this_parent != child->d_parent ||
2939 (!locked && read_seqretry(&rename_lock, seq))) {
2940 spin_unlock(&this_parent->d_lock);
2941 rcu_read_unlock();
2942 goto rename_retry; 2960 goto rename_retry;
2943 }
2944 rcu_read_unlock();
2945 next = child->d_u.d_child.next; 2961 next = child->d_u.d_child.next;
2946 goto resume; 2962 goto resume;
2947 } 2963 }
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 37a8ca7c1222..e7a7a2f07324 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -13,9 +13,6 @@
13 * 13 *
14 */ 14 */
15 15
16/* uncomment to get debug messages from the debug filesystem, ah the irony. */
17/* #define DEBUG */
18
19#include <linux/module.h> 16#include <linux/module.h>
20#include <linux/fs.h> 17#include <linux/fs.h>
21#include <linux/mount.h> 18#include <linux/mount.h>
@@ -310,7 +307,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
310} 307}
311EXPORT_SYMBOL_GPL(debugfs_create_symlink); 308EXPORT_SYMBOL_GPL(debugfs_create_symlink);
312 309
313static void __debugfs_remove(struct dentry *dentry, struct dentry *parent) 310static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
314{ 311{
315 int ret = 0; 312 int ret = 0;
316 313
@@ -333,6 +330,7 @@ static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
333 dput(dentry); 330 dput(dentry);
334 } 331 }
335 } 332 }
333 return ret;
336} 334}
337 335
338/** 336/**
@@ -351,7 +349,8 @@ static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
351void debugfs_remove(struct dentry *dentry) 349void debugfs_remove(struct dentry *dentry)
352{ 350{
353 struct dentry *parent; 351 struct dentry *parent;
354 352 int ret;
353
355 if (!dentry) 354 if (!dentry)
356 return; 355 return;
357 356
@@ -360,9 +359,10 @@ void debugfs_remove(struct dentry *dentry)
360 return; 359 return;
361 360
362 mutex_lock(&parent->d_inode->i_mutex); 361 mutex_lock(&parent->d_inode->i_mutex);
363 __debugfs_remove(dentry, parent); 362 ret = __debugfs_remove(dentry, parent);
364 mutex_unlock(&parent->d_inode->i_mutex); 363 mutex_unlock(&parent->d_inode->i_mutex);
365 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 364 if (!ret)
365 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
366} 366}
367EXPORT_SYMBOL_GPL(debugfs_remove); 367EXPORT_SYMBOL_GPL(debugfs_remove);
368 368
@@ -540,17 +540,5 @@ static int __init debugfs_init(void)
540 540
541 return retval; 541 return retval;
542} 542}
543
544static void __exit debugfs_exit(void)
545{
546 debugfs_registered = false;
547
548 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
549 unregister_filesystem(&debug_fs_type);
550 kobject_put(debug_kobj);
551}
552
553core_initcall(debugfs_init); 543core_initcall(debugfs_init);
554module_exit(debugfs_exit);
555MODULE_LICENSE("GPL");
556 544
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 1bb547c9cad6..2f27e578d466 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -479,6 +479,7 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
479 struct dentry *root = sb->s_root; 479 struct dentry *root = sb->s_root;
480 struct pts_fs_info *fsi = DEVPTS_SB(sb); 480 struct pts_fs_info *fsi = DEVPTS_SB(sb);
481 struct pts_mount_opts *opts = &fsi->mount_opts; 481 struct pts_mount_opts *opts = &fsi->mount_opts;
482 int ret = 0;
482 char s[12]; 483 char s[12];
483 484
484 /* We're supposed to be given the slave end of a pty */ 485 /* We're supposed to be given the slave end of a pty */
@@ -501,14 +502,17 @@ int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty)
501 mutex_lock(&root->d_inode->i_mutex); 502 mutex_lock(&root->d_inode->i_mutex);
502 503
503 dentry = d_alloc_name(root, s); 504 dentry = d_alloc_name(root, s);
504 if (!IS_ERR(dentry)) { 505 if (dentry) {
505 d_add(dentry, inode); 506 d_add(dentry, inode);
506 fsnotify_create(root->d_inode, dentry); 507 fsnotify_create(root->d_inode, dentry);
508 } else {
509 iput(inode);
510 ret = -ENOMEM;
507 } 511 }
508 512
509 mutex_unlock(&root->d_inode->i_mutex); 513 mutex_unlock(&root->d_inode->i_mutex);
510 514
511 return 0; 515 return ret;
512} 516}
513 517
514struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) 518struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number)
@@ -544,17 +548,12 @@ void devpts_pty_kill(struct tty_struct *tty)
544 mutex_lock(&root->d_inode->i_mutex); 548 mutex_lock(&root->d_inode->i_mutex);
545 549
546 dentry = d_find_alias(inode); 550 dentry = d_find_alias(inode);
547 if (IS_ERR(dentry))
548 goto out;
549
550 if (dentry) {
551 inode->i_nlink--;
552 d_delete(dentry);
553 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
554 }
555 551
552 inode->i_nlink--;
553 d_delete(dentry);
554 dput(dentry); /* d_alloc_name() in devpts_pty_new() */
556 dput(dentry); /* d_find_alias above */ 555 dput(dentry); /* d_find_alias above */
557out: 556
558 mutex_unlock(&root->d_inode->i_mutex); 557 mutex_unlock(&root->d_inode->i_mutex);
559} 558}
560 559
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b044705eedd4..dcb5577cde1d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -645,11 +645,11 @@ static int dio_send_cur_page(struct dio *dio)
645 /* 645 /*
646 * See whether this new request is contiguous with the old. 646 * See whether this new request is contiguous with the old.
647 * 647 *
648 * Btrfs cannot handl having logically non-contiguous requests 648 * Btrfs cannot handle having logically non-contiguous requests
649 * submitted. For exmple if you have 649 * submitted. For example if you have
650 * 650 *
651 * Logical: [0-4095][HOLE][8192-12287] 651 * Logical: [0-4095][HOLE][8192-12287]
652 * Phyiscal: [0-4095] [4096-8181] 652 * Physical: [0-4095] [4096-8191]
653 * 653 *
654 * We cannot submit those pages together as one BIO. So if our 654 * We cannot submit those pages together as one BIO. So if our
655 * current logical offset in the file does not equal what would 655 * current logical offset in the file does not equal what would
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 4314f0d48d85..abc49f292454 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -18,6 +18,7 @@
18 18
19#define WAKE_ASTS 0 19#define WAKE_ASTS 0
20 20
21static uint64_t ast_seq_count;
21static struct list_head ast_queue; 22static struct list_head ast_queue;
22static spinlock_t ast_queue_lock; 23static spinlock_t ast_queue_lock;
23static struct task_struct * astd_task; 24static struct task_struct * astd_task;
@@ -25,40 +26,186 @@ static unsigned long astd_wakeflags;
25static struct mutex astd_running; 26static struct mutex astd_running;
26 27
27 28
29static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
30{
31 int i;
32
33 log_print("last_bast %x %llu flags %x mode %d sb %d %x",
34 lkb->lkb_id,
35 (unsigned long long)lkb->lkb_last_bast.seq,
36 lkb->lkb_last_bast.flags,
37 lkb->lkb_last_bast.mode,
38 lkb->lkb_last_bast.sb_status,
39 lkb->lkb_last_bast.sb_flags);
40
41 log_print("last_cast %x %llu flags %x mode %d sb %d %x",
42 lkb->lkb_id,
43 (unsigned long long)lkb->lkb_last_cast.seq,
44 lkb->lkb_last_cast.flags,
45 lkb->lkb_last_cast.mode,
46 lkb->lkb_last_cast.sb_status,
47 lkb->lkb_last_cast.sb_flags);
48
49 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
50 log_print("cb %x %llu flags %x mode %d sb %d %x",
51 lkb->lkb_id,
52 (unsigned long long)lkb->lkb_callbacks[i].seq,
53 lkb->lkb_callbacks[i].flags,
54 lkb->lkb_callbacks[i].mode,
55 lkb->lkb_callbacks[i].sb_status,
56 lkb->lkb_callbacks[i].sb_flags);
57 }
58}
59
28void dlm_del_ast(struct dlm_lkb *lkb) 60void dlm_del_ast(struct dlm_lkb *lkb)
29{ 61{
30 spin_lock(&ast_queue_lock); 62 spin_lock(&ast_queue_lock);
31 if (lkb->lkb_ast_type & (AST_COMP | AST_BAST)) 63 if (!list_empty(&lkb->lkb_astqueue))
32 list_del(&lkb->lkb_astqueue); 64 list_del_init(&lkb->lkb_astqueue);
33 spin_unlock(&ast_queue_lock); 65 spin_unlock(&ast_queue_lock);
34} 66}
35 67
36void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode) 68int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
69 int status, uint32_t sbflags, uint64_t seq)
37{ 70{
71 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
72 uint64_t prev_seq;
73 int prev_mode;
74 int i;
75
76 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
77 if (lkb->lkb_callbacks[i].seq)
78 continue;
79
80 /*
81 * Suppress some redundant basts here, do more on removal.
82 * Don't even add a bast if the callback just before it
83 * is a bast for the same mode or a more restrictive mode.
84 * (the addional > PR check is needed for PR/CW inversion)
85 */
86
87 if ((i > 0) && (flags & DLM_CB_BAST) &&
88 (lkb->lkb_callbacks[i-1].flags & DLM_CB_BAST)) {
89
90 prev_seq = lkb->lkb_callbacks[i-1].seq;
91 prev_mode = lkb->lkb_callbacks[i-1].mode;
92
93 if ((prev_mode == mode) ||
94 (prev_mode > mode && prev_mode > DLM_LOCK_PR)) {
95
96 log_debug(ls, "skip %x add bast %llu mode %d "
97 "for bast %llu mode %d",
98 lkb->lkb_id,
99 (unsigned long long)seq,
100 mode,
101 (unsigned long long)prev_seq,
102 prev_mode);
103 return 0;
104 }
105 }
106
107 lkb->lkb_callbacks[i].seq = seq;
108 lkb->lkb_callbacks[i].flags = flags;
109 lkb->lkb_callbacks[i].mode = mode;
110 lkb->lkb_callbacks[i].sb_status = status;
111 lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
112 break;
113 }
114
115 if (i == DLM_CALLBACKS_SIZE) {
116 log_error(ls, "no callbacks %x %llu flags %x mode %d sb %d %x",
117 lkb->lkb_id, (unsigned long long)seq,
118 flags, mode, status, sbflags);
119 dlm_dump_lkb_callbacks(lkb);
120 return -1;
121 }
122
123 return 0;
124}
125
126int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
127 struct dlm_callback *cb, int *resid)
128{
129 int i;
130
131 *resid = 0;
132
133 if (!lkb->lkb_callbacks[0].seq)
134 return -ENOENT;
135
136 /* oldest undelivered cb is callbacks[0] */
137
138 memcpy(cb, &lkb->lkb_callbacks[0], sizeof(struct dlm_callback));
139 memset(&lkb->lkb_callbacks[0], 0, sizeof(struct dlm_callback));
140
141 /* shift others down */
142
143 for (i = 1; i < DLM_CALLBACKS_SIZE; i++) {
144 if (!lkb->lkb_callbacks[i].seq)
145 break;
146 memcpy(&lkb->lkb_callbacks[i-1], &lkb->lkb_callbacks[i],
147 sizeof(struct dlm_callback));
148 memset(&lkb->lkb_callbacks[i], 0, sizeof(struct dlm_callback));
149 (*resid)++;
150 }
151
152 /* if cb is a bast, it should be skipped if the blocking mode is
153 compatible with the last granted mode */
154
155 if ((cb->flags & DLM_CB_BAST) && lkb->lkb_last_cast.seq) {
156 if (dlm_modes_compat(cb->mode, lkb->lkb_last_cast.mode)) {
157 cb->flags |= DLM_CB_SKIP;
158
159 log_debug(ls, "skip %x bast %llu mode %d "
160 "for cast %llu mode %d",
161 lkb->lkb_id,
162 (unsigned long long)cb->seq,
163 cb->mode,
164 (unsigned long long)lkb->lkb_last_cast.seq,
165 lkb->lkb_last_cast.mode);
166 return 0;
167 }
168 }
169
170 if (cb->flags & DLM_CB_CAST) {
171 memcpy(&lkb->lkb_last_cast, cb, sizeof(struct dlm_callback));
172 lkb->lkb_last_cast_time = ktime_get();
173 }
174
175 if (cb->flags & DLM_CB_BAST) {
176 memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
177 lkb->lkb_last_bast_time = ktime_get();
178 }
179
180 return 0;
181}
182
183void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
184 uint32_t sbflags)
185{
186 uint64_t seq;
187 int rv;
188
189 spin_lock(&ast_queue_lock);
190
191 seq = ++ast_seq_count;
192
38 if (lkb->lkb_flags & DLM_IFL_USER) { 193 if (lkb->lkb_flags & DLM_IFL_USER) {
39 dlm_user_add_ast(lkb, type, mode); 194 spin_unlock(&ast_queue_lock);
195 dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq);
40 return; 196 return;
41 } 197 }
42 198
43 spin_lock(&ast_queue_lock); 199 rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq);
44 if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) { 200 if (rv < 0) {
201 spin_unlock(&ast_queue_lock);
202 return;
203 }
204
205 if (list_empty(&lkb->lkb_astqueue)) {
45 kref_get(&lkb->lkb_ref); 206 kref_get(&lkb->lkb_ref);
46 list_add_tail(&lkb->lkb_astqueue, &ast_queue); 207 list_add_tail(&lkb->lkb_astqueue, &ast_queue);
47 lkb->lkb_ast_first = type;
48 } 208 }
49
50 /* sanity check, this should not happen */
51
52 if ((type == AST_COMP) && (lkb->lkb_ast_type & AST_COMP))
53 log_print("repeat cast %d castmode %d lock %x %s",
54 mode, lkb->lkb_castmode,
55 lkb->lkb_id, lkb->lkb_resource->res_name);
56
57 lkb->lkb_ast_type |= type;
58 if (type == AST_BAST)
59 lkb->lkb_bastmode = mode;
60 else
61 lkb->lkb_castmode = mode;
62 spin_unlock(&ast_queue_lock); 209 spin_unlock(&ast_queue_lock);
63 210
64 set_bit(WAKE_ASTS, &astd_wakeflags); 211 set_bit(WAKE_ASTS, &astd_wakeflags);
@@ -72,7 +219,8 @@ static void process_asts(void)
72 struct dlm_lkb *lkb; 219 struct dlm_lkb *lkb;
73 void (*castfn) (void *astparam); 220 void (*castfn) (void *astparam);
74 void (*bastfn) (void *astparam, int mode); 221 void (*bastfn) (void *astparam, int mode);
75 int type, first, bastmode, castmode, do_bast, do_cast, last_castmode; 222 struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
223 int i, rv, resid;
76 224
77repeat: 225repeat:
78 spin_lock(&ast_queue_lock); 226 spin_lock(&ast_queue_lock);
@@ -83,54 +231,45 @@ repeat:
83 if (dlm_locking_stopped(ls)) 231 if (dlm_locking_stopped(ls))
84 continue; 232 continue;
85 233
86 list_del(&lkb->lkb_astqueue); 234 /* we remove from astqueue list and remove everything in
87 type = lkb->lkb_ast_type; 235 lkb_callbacks before releasing the spinlock so empty
88 lkb->lkb_ast_type = 0; 236 lkb_astqueue is always consistent with empty lkb_callbacks */
89 first = lkb->lkb_ast_first; 237
90 lkb->lkb_ast_first = 0; 238 list_del_init(&lkb->lkb_astqueue);
91 bastmode = lkb->lkb_bastmode; 239
92 castmode = lkb->lkb_castmode;
93 castfn = lkb->lkb_astfn; 240 castfn = lkb->lkb_astfn;
94 bastfn = lkb->lkb_bastfn; 241 bastfn = lkb->lkb_bastfn;
95 spin_unlock(&ast_queue_lock);
96 242
97 do_cast = (type & AST_COMP) && castfn; 243 memset(&callbacks, 0, sizeof(callbacks));
98 do_bast = (type & AST_BAST) && bastfn;
99 244
100 /* Skip a bast if its blocking mode is compatible with the 245 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
101 granted mode of the preceding cast. */ 246 rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
247 if (rv < 0)
248 break;
249 }
250 spin_unlock(&ast_queue_lock);
102 251
103 if (do_bast) { 252 if (resid) {
104 if (first == AST_COMP) 253 /* shouldn't happen, for loop should have removed all */
105 last_castmode = castmode; 254 log_error(ls, "callback resid %d lkb %x",
106 else 255 resid, lkb->lkb_id);
107 last_castmode = lkb->lkb_castmode_done;
108 if (dlm_modes_compat(bastmode, last_castmode))
109 do_bast = 0;
110 } 256 }
111 257
112 if (first == AST_COMP) { 258 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
113 if (do_cast) 259 if (!callbacks[i].seq)
114 castfn(lkb->lkb_astparam); 260 break;
115 if (do_bast) 261 if (callbacks[i].flags & DLM_CB_SKIP) {
116 bastfn(lkb->lkb_astparam, bastmode); 262 continue;
117 } else if (first == AST_BAST) { 263 } else if (callbacks[i].flags & DLM_CB_BAST) {
118 if (do_bast) 264 bastfn(lkb->lkb_astparam, callbacks[i].mode);
119 bastfn(lkb->lkb_astparam, bastmode); 265 } else if (callbacks[i].flags & DLM_CB_CAST) {
120 if (do_cast) 266 lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
267 lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
121 castfn(lkb->lkb_astparam); 268 castfn(lkb->lkb_astparam);
122 } else { 269 }
123 log_error(ls, "bad ast_first %d ast_type %d",
124 first, type);
125 } 270 }
126 271
127 if (do_cast) 272 /* removes ref for ast_queue, may cause lkb to be freed */
128 lkb->lkb_castmode_done = castmode;
129 if (do_bast)
130 lkb->lkb_bastmode_done = bastmode;
131
132 /* this removes the reference added by dlm_add_ast
133 and may result in the lkb being freed */
134 dlm_put_lkb(lkb); 273 dlm_put_lkb(lkb);
135 274
136 cond_resched(); 275 cond_resched();
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index bcb1aaba519d..8aa89c9b5611 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -13,8 +13,13 @@
13#ifndef __ASTD_DOT_H__ 13#ifndef __ASTD_DOT_H__
14#define __ASTD_DOT_H__ 14#define __ASTD_DOT_H__
15 15
16void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode);
17void dlm_del_ast(struct dlm_lkb *lkb); 16void dlm_del_ast(struct dlm_lkb *lkb);
17int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
18 int status, uint32_t sbflags, uint64_t seq);
19int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
20 struct dlm_callback *cb, int *resid);
21void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
22 uint32_t sbflags);
18 23
19void dlm_astd_wake(void); 24void dlm_astd_wake(void);
20int dlm_astd_start(void); 25int dlm_astd_start(void);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index b54bca03d92f..0d329ff8ed4c 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -977,9 +977,9 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
977/* Config file defaults */ 977/* Config file defaults */
978#define DEFAULT_TCP_PORT 21064 978#define DEFAULT_TCP_PORT 21064
979#define DEFAULT_BUFFER_SIZE 4096 979#define DEFAULT_BUFFER_SIZE 4096
980#define DEFAULT_RSBTBL_SIZE 256 980#define DEFAULT_RSBTBL_SIZE 1024
981#define DEFAULT_LKBTBL_SIZE 1024 981#define DEFAULT_LKBTBL_SIZE 1024
982#define DEFAULT_DIRTBL_SIZE 512 982#define DEFAULT_DIRTBL_SIZE 1024
983#define DEFAULT_RECOVER_TIMER 5 983#define DEFAULT_RECOVER_TIMER 5
984#define DEFAULT_TOSS_SECS 10 984#define DEFAULT_TOSS_SECS 10
985#define DEFAULT_SCAN_SECS 5 985#define DEFAULT_SCAN_SECS 5
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 6b42ba807dfd..59779237e2b4 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -257,12 +257,12 @@ static int print_format3_lock(struct seq_file *s, struct dlm_lkb *lkb,
257 lkb->lkb_status, 257 lkb->lkb_status,
258 lkb->lkb_grmode, 258 lkb->lkb_grmode,
259 lkb->lkb_rqmode, 259 lkb->lkb_rqmode,
260 lkb->lkb_bastmode, 260 lkb->lkb_last_bast.mode,
261 rsb_lookup, 261 rsb_lookup,
262 lkb->lkb_wait_type, 262 lkb->lkb_wait_type,
263 lkb->lkb_lvbseq, 263 lkb->lkb_lvbseq,
264 (unsigned long long)ktime_to_ns(lkb->lkb_timestamp), 264 (unsigned long long)ktime_to_ns(lkb->lkb_timestamp),
265 (unsigned long long)ktime_to_ns(lkb->lkb_time_bast)); 265 (unsigned long long)ktime_to_ns(lkb->lkb_last_bast_time));
266 return rv; 266 return rv;
267} 267}
268 268
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index f632b58cd222..b94204913011 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -192,11 +192,6 @@ struct dlm_args {
192 * lkb is a process copy, the nodeid specifies the lock master. 192 * lkb is a process copy, the nodeid specifies the lock master.
193 */ 193 */
194 194
195/* lkb_ast_type */
196
197#define AST_COMP 1
198#define AST_BAST 2
199
200/* lkb_status */ 195/* lkb_status */
201 196
202#define DLM_LKSTS_WAITING 1 197#define DLM_LKSTS_WAITING 1
@@ -217,6 +212,20 @@ struct dlm_args {
217#define DLM_IFL_USER 0x00000001 212#define DLM_IFL_USER 0x00000001
218#define DLM_IFL_ORPHAN 0x00000002 213#define DLM_IFL_ORPHAN 0x00000002
219 214
215#define DLM_CALLBACKS_SIZE 6
216
217#define DLM_CB_CAST 0x00000001
218#define DLM_CB_BAST 0x00000002
219#define DLM_CB_SKIP 0x00000004
220
221struct dlm_callback {
222 uint64_t seq;
223 uint32_t flags; /* DLM_CBF_ */
224 int sb_status; /* copy to lksb status */
225 uint8_t sb_flags; /* copy to lksb flags */
226 int8_t mode; /* rq mode of bast, gr mode of cast */
227};
228
220struct dlm_lkb { 229struct dlm_lkb {
221 struct dlm_rsb *lkb_resource; /* the rsb */ 230 struct dlm_rsb *lkb_resource; /* the rsb */
222 struct kref lkb_ref; 231 struct kref lkb_ref;
@@ -236,13 +245,6 @@ struct dlm_lkb {
236 245
237 int8_t lkb_wait_type; /* type of reply waiting for */ 246 int8_t lkb_wait_type; /* type of reply waiting for */
238 int8_t lkb_wait_count; 247 int8_t lkb_wait_count;
239 int8_t lkb_ast_type; /* type of ast queued for */
240 int8_t lkb_ast_first; /* type of first ast queued */
241
242 int8_t lkb_bastmode; /* req mode of queued bast */
243 int8_t lkb_castmode; /* gr mode of queued cast */
244 int8_t lkb_bastmode_done; /* last delivered bastmode */
245 int8_t lkb_castmode_done; /* last delivered castmode */
246 248
247 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ 249 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
248 struct list_head lkb_statequeue; /* rsb g/c/w list */ 250 struct list_head lkb_statequeue; /* rsb g/c/w list */
@@ -251,10 +253,15 @@ struct dlm_lkb {
251 struct list_head lkb_astqueue; /* need ast to be sent */ 253 struct list_head lkb_astqueue; /* need ast to be sent */
252 struct list_head lkb_ownqueue; /* list of locks for a process */ 254 struct list_head lkb_ownqueue; /* list of locks for a process */
253 struct list_head lkb_time_list; 255 struct list_head lkb_time_list;
254 ktime_t lkb_time_bast; /* for debugging */
255 ktime_t lkb_timestamp; 256 ktime_t lkb_timestamp;
256 unsigned long lkb_timeout_cs; 257 unsigned long lkb_timeout_cs;
257 258
259 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
260 struct dlm_callback lkb_last_cast;
261 struct dlm_callback lkb_last_bast;
262 ktime_t lkb_last_cast_time; /* for debugging */
263 ktime_t lkb_last_bast_time; /* for debugging */
264
258 char *lkb_lvbptr; 265 char *lkb_lvbptr;
259 struct dlm_lksb *lkb_lksb; /* caller's status block */ 266 struct dlm_lksb *lkb_lksb; /* caller's status block */
260 void (*lkb_astfn) (void *astparam); 267 void (*lkb_astfn) (void *astparam);
@@ -544,8 +551,6 @@ struct dlm_user_args {
544 (dlm_user_proc) on the struct file, 551 (dlm_user_proc) on the struct file,
545 the process's locks point back to it*/ 552 the process's locks point back to it*/
546 struct dlm_lksb lksb; 553 struct dlm_lksb lksb;
547 int old_mode;
548 int update_user_lvb;
549 struct dlm_lksb __user *user_lksb; 554 struct dlm_lksb __user *user_lksb;
550 void __user *castparam; 555 void __user *castparam;
551 void __user *castaddr; 556 void __user *castaddr;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 64e5f3efdd81..04b8c449303f 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -160,10 +160,10 @@ static const int __quecvt_compat_matrix[8][8] = {
160void dlm_print_lkb(struct dlm_lkb *lkb) 160void dlm_print_lkb(struct dlm_lkb *lkb)
161{ 161{
162 printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n" 162 printk(KERN_ERR "lkb: nodeid %d id %x remid %x exflags %x flags %x\n"
163 " status %d rqmode %d grmode %d wait_type %d ast_type %d\n", 163 " status %d rqmode %d grmode %d wait_type %d\n",
164 lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags, 164 lkb->lkb_nodeid, lkb->lkb_id, lkb->lkb_remid, lkb->lkb_exflags,
165 lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode, 165 lkb->lkb_flags, lkb->lkb_status, lkb->lkb_rqmode,
166 lkb->lkb_grmode, lkb->lkb_wait_type, lkb->lkb_ast_type); 166 lkb->lkb_grmode, lkb->lkb_wait_type);
167} 167}
168 168
169static void dlm_print_rsb(struct dlm_rsb *r) 169static void dlm_print_rsb(struct dlm_rsb *r)
@@ -305,10 +305,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
305 rv = -EDEADLK; 305 rv = -EDEADLK;
306 } 306 }
307 307
308 lkb->lkb_lksb->sb_status = rv; 308 dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
309 lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
310
311 dlm_add_ast(lkb, AST_COMP, lkb->lkb_grmode);
312} 309}
313 310
314static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb) 311static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -319,13 +316,10 @@ static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
319 316
320static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode) 317static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
321{ 318{
322 lkb->lkb_time_bast = ktime_get();
323
324 if (is_master_copy(lkb)) { 319 if (is_master_copy(lkb)) {
325 lkb->lkb_bastmode = rqmode; /* printed by debugfs */
326 send_bast(r, lkb, rqmode); 320 send_bast(r, lkb, rqmode);
327 } else { 321 } else {
328 dlm_add_ast(lkb, AST_BAST, rqmode); 322 dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0);
329 } 323 }
330} 324}
331 325
@@ -600,6 +594,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
600 INIT_LIST_HEAD(&lkb->lkb_ownqueue); 594 INIT_LIST_HEAD(&lkb->lkb_ownqueue);
601 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); 595 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
602 INIT_LIST_HEAD(&lkb->lkb_time_list); 596 INIT_LIST_HEAD(&lkb->lkb_time_list);
597 INIT_LIST_HEAD(&lkb->lkb_astqueue);
603 598
604 get_random_bytes(&bucket, sizeof(bucket)); 599 get_random_bytes(&bucket, sizeof(bucket));
605 bucket &= (ls->ls_lkbtbl_size - 1); 600 bucket &= (ls->ls_lkbtbl_size - 1);
@@ -2819,9 +2814,9 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
2819 not from lkb fields */ 2814 not from lkb fields */
2820 2815
2821 if (lkb->lkb_bastfn) 2816 if (lkb->lkb_bastfn)
2822 ms->m_asts |= AST_BAST; 2817 ms->m_asts |= DLM_CB_BAST;
2823 if (lkb->lkb_astfn) 2818 if (lkb->lkb_astfn)
2824 ms->m_asts |= AST_COMP; 2819 ms->m_asts |= DLM_CB_CAST;
2825 2820
2826 /* compare with switch in create_message; send_remove() doesn't 2821 /* compare with switch in create_message; send_remove() doesn't
2827 use send_args() */ 2822 use send_args() */
@@ -3122,8 +3117,8 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
3122 lkb->lkb_grmode = DLM_LOCK_IV; 3117 lkb->lkb_grmode = DLM_LOCK_IV;
3123 lkb->lkb_rqmode = ms->m_rqmode; 3118 lkb->lkb_rqmode = ms->m_rqmode;
3124 3119
3125 lkb->lkb_bastfn = (ms->m_asts & AST_BAST) ? &fake_bastfn : NULL; 3120 lkb->lkb_bastfn = (ms->m_asts & DLM_CB_BAST) ? &fake_bastfn : NULL;
3126 lkb->lkb_astfn = (ms->m_asts & AST_COMP) ? &fake_astfn : NULL; 3121 lkb->lkb_astfn = (ms->m_asts & DLM_CB_CAST) ? &fake_astfn : NULL;
3127 3122
3128 if (lkb->lkb_exflags & DLM_LKF_VALBLK) { 3123 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
3129 /* lkb was just created so there won't be an lvb yet */ 3124 /* lkb was just created so there won't be an lvb yet */
@@ -4412,8 +4407,8 @@ static int receive_rcom_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
4412 lkb->lkb_grmode = rl->rl_grmode; 4407 lkb->lkb_grmode = rl->rl_grmode;
4413 /* don't set lkb_status because add_lkb wants to itself */ 4408 /* don't set lkb_status because add_lkb wants to itself */
4414 4409
4415 lkb->lkb_bastfn = (rl->rl_asts & AST_BAST) ? &fake_bastfn : NULL; 4410 lkb->lkb_bastfn = (rl->rl_asts & DLM_CB_BAST) ? &fake_bastfn : NULL;
4416 lkb->lkb_astfn = (rl->rl_asts & AST_COMP) ? &fake_astfn : NULL; 4411 lkb->lkb_astfn = (rl->rl_asts & DLM_CB_CAST) ? &fake_astfn : NULL;
4417 4412
4418 if (lkb->lkb_exflags & DLM_LKF_VALBLK) { 4413 if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
4419 int lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) - 4414 int lvblen = rc->rc_header.h_length - sizeof(struct dlm_rcom) -
@@ -4589,7 +4584,6 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
4589 error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs, 4584 error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
4590 fake_astfn, ua, fake_bastfn, &args); 4585 fake_astfn, ua, fake_bastfn, &args);
4591 lkb->lkb_flags |= DLM_IFL_USER; 4586 lkb->lkb_flags |= DLM_IFL_USER;
4592 ua->old_mode = DLM_LOCK_IV;
4593 4587
4594 if (error) { 4588 if (error) {
4595 __put_lkb(ls, lkb); 4589 __put_lkb(ls, lkb);
@@ -4658,7 +4652,6 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4658 ua->bastparam = ua_tmp->bastparam; 4652 ua->bastparam = ua_tmp->bastparam;
4659 ua->bastaddr = ua_tmp->bastaddr; 4653 ua->bastaddr = ua_tmp->bastaddr;
4660 ua->user_lksb = ua_tmp->user_lksb; 4654 ua->user_lksb = ua_tmp->user_lksb;
4661 ua->old_mode = lkb->lkb_grmode;
4662 4655
4663 error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs, 4656 error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
4664 fake_astfn, ua, fake_bastfn, &args); 4657 fake_astfn, ua, fake_bastfn, &args);
@@ -4917,8 +4910,9 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4917 } 4910 }
4918 4911
4919 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { 4912 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
4920 lkb->lkb_ast_type = 0; 4913 memset(&lkb->lkb_callbacks, 0,
4921 list_del(&lkb->lkb_astqueue); 4914 sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
4915 list_del_init(&lkb->lkb_astqueue);
4922 dlm_put_lkb(lkb); 4916 dlm_put_lkb(lkb);
4923 } 4917 }
4924 4918
@@ -4958,7 +4952,9 @@ static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
4958 4952
4959 spin_lock(&proc->asts_spin); 4953 spin_lock(&proc->asts_spin);
4960 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { 4954 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
4961 list_del(&lkb->lkb_astqueue); 4955 memset(&lkb->lkb_callbacks, 0,
4956 sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
4957 list_del_init(&lkb->lkb_astqueue);
4962 dlm_put_lkb(lkb); 4958 dlm_put_lkb(lkb);
4963 } 4959 }
4964 spin_unlock(&proc->asts_spin); 4960 spin_unlock(&proc->asts_spin);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 2d8c87b951c2..bffa1e73b9a9 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1468,13 +1468,15 @@ static void work_stop(void)
1468 1468
1469static int work_start(void) 1469static int work_start(void)
1470{ 1470{
1471 recv_workqueue = create_singlethread_workqueue("dlm_recv"); 1471 recv_workqueue = alloc_workqueue("dlm_recv",
1472 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
1472 if (!recv_workqueue) { 1473 if (!recv_workqueue) {
1473 log_print("can't start dlm_recv"); 1474 log_print("can't start dlm_recv");
1474 return -ENOMEM; 1475 return -ENOMEM;
1475 } 1476 }
1476 1477
1477 send_workqueue = create_singlethread_workqueue("dlm_send"); 1478 send_workqueue = alloc_workqueue("dlm_send",
1479 WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
1478 if (!send_workqueue) { 1480 if (!send_workqueue) {
1479 log_print("can't start dlm_send"); 1481 log_print("can't start dlm_send");
1480 destroy_workqueue(recv_workqueue); 1482 destroy_workqueue(recv_workqueue);
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 3c83a49a48a3..f10a50f24e8f 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -321,9 +321,9 @@ static void pack_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb,
321 rl->rl_wait_type = cpu_to_le16(lkb->lkb_wait_type); 321 rl->rl_wait_type = cpu_to_le16(lkb->lkb_wait_type);
322 322
323 if (lkb->lkb_bastfn) 323 if (lkb->lkb_bastfn)
324 rl->rl_asts |= AST_BAST; 324 rl->rl_asts |= DLM_CB_BAST;
325 if (lkb->lkb_astfn) 325 if (lkb->lkb_astfn)
326 rl->rl_asts |= AST_COMP; 326 rl->rl_asts |= DLM_CB_CAST;
327 327
328 rl->rl_namelen = cpu_to_le16(r->res_length); 328 rl->rl_namelen = cpu_to_le16(r->res_length);
329 memcpy(rl->rl_name, r->res_name, r->res_length); 329 memcpy(rl->rl_name, r->res_name, r->res_length);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 66d6c16bf440..d5ab3fe7c198 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -24,6 +24,7 @@
24#include "lock.h" 24#include "lock.h"
25#include "lvb_table.h" 25#include "lvb_table.h"
26#include "user.h" 26#include "user.h"
27#include "ast.h"
27 28
28static const char name_prefix[] = "dlm"; 29static const char name_prefix[] = "dlm";
29static const struct file_operations device_fops; 30static const struct file_operations device_fops;
@@ -152,19 +153,16 @@ static void compat_output(struct dlm_lock_result *res,
152 not related to the lifetime of the lkb struct which is managed 153 not related to the lifetime of the lkb struct which is managed
153 entirely by refcount. */ 154 entirely by refcount. */
154 155
155static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type) 156static int lkb_is_endoflife(int mode, int status)
156{ 157{
157 switch (sb_status) { 158 switch (status) {
158 case -DLM_EUNLOCK: 159 case -DLM_EUNLOCK:
159 return 1; 160 return 1;
160 case -DLM_ECANCEL: 161 case -DLM_ECANCEL:
161 case -ETIMEDOUT: 162 case -ETIMEDOUT:
162 case -EDEADLK: 163 case -EDEADLK:
163 if (lkb->lkb_grmode == DLM_LOCK_IV)
164 return 1;
165 break;
166 case -EAGAIN: 164 case -EAGAIN:
167 if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV) 165 if (mode == DLM_LOCK_IV)
168 return 1; 166 return 1;
169 break; 167 break;
170 } 168 }
@@ -174,12 +172,13 @@ static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
174/* we could possibly check if the cancel of an orphan has resulted in the lkb 172/* we could possibly check if the cancel of an orphan has resulted in the lkb
175 being removed and then remove that lkb from the orphans list and free it */ 173 being removed and then remove that lkb from the orphans list and free it */
176 174
177void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode) 175void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
176 int status, uint32_t sbflags, uint64_t seq)
178{ 177{
179 struct dlm_ls *ls; 178 struct dlm_ls *ls;
180 struct dlm_user_args *ua; 179 struct dlm_user_args *ua;
181 struct dlm_user_proc *proc; 180 struct dlm_user_proc *proc;
182 int eol = 0, ast_type; 181 int rv;
183 182
184 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD)) 183 if (lkb->lkb_flags & (DLM_IFL_ORPHAN | DLM_IFL_DEAD))
185 return; 184 return;
@@ -200,49 +199,29 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode)
200 ua = lkb->lkb_ua; 199 ua = lkb->lkb_ua;
201 proc = ua->proc; 200 proc = ua->proc;
202 201
203 if (type == AST_BAST && ua->bastaddr == NULL) 202 if ((flags & DLM_CB_BAST) && ua->bastaddr == NULL)
204 goto out; 203 goto out;
205 204
205 if ((flags & DLM_CB_CAST) && lkb_is_endoflife(mode, status))
206 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
207
206 spin_lock(&proc->asts_spin); 208 spin_lock(&proc->asts_spin);
207 209
208 ast_type = lkb->lkb_ast_type; 210 rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq);
209 lkb->lkb_ast_type |= type; 211 if (rv < 0) {
210 if (type == AST_BAST) 212 spin_unlock(&proc->asts_spin);
211 lkb->lkb_bastmode = mode; 213 goto out;
212 else 214 }
213 lkb->lkb_castmode = mode;
214 215
215 if (!ast_type) { 216 if (list_empty(&lkb->lkb_astqueue)) {
216 kref_get(&lkb->lkb_ref); 217 kref_get(&lkb->lkb_ref);
217 list_add_tail(&lkb->lkb_astqueue, &proc->asts); 218 list_add_tail(&lkb->lkb_astqueue, &proc->asts);
218 lkb->lkb_ast_first = type;
219 wake_up_interruptible(&proc->wait); 219 wake_up_interruptible(&proc->wait);
220 } 220 }
221 if (type == AST_COMP && (ast_type & AST_COMP))
222 log_debug(ls, "ast overlap %x status %x %x",
223 lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
224
225 eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
226 if (eol) {
227 lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
228 }
229
230 /* We want to copy the lvb to userspace when the completion
231 ast is read if the status is 0, the lock has an lvb and
232 lvb_ops says we should. We could probably have set_lvb_lock()
233 set update_user_lvb instead and not need old_mode */
234
235 if ((lkb->lkb_ast_type & AST_COMP) &&
236 (lkb->lkb_lksb->sb_status == 0) &&
237 lkb->lkb_lksb->sb_lvbptr &&
238 dlm_lvb_operations[ua->old_mode + 1][lkb->lkb_grmode + 1])
239 ua->update_user_lvb = 1;
240 else
241 ua->update_user_lvb = 0;
242
243 spin_unlock(&proc->asts_spin); 221 spin_unlock(&proc->asts_spin);
244 222
245 if (eol) { 223 if (lkb->lkb_flags & DLM_IFL_ENDOFLIFE) {
224 /* N.B. spin_lock locks_spin, not asts_spin */
246 spin_lock(&proc->locks_spin); 225 spin_lock(&proc->locks_spin);
247 if (!list_empty(&lkb->lkb_ownqueue)) { 226 if (!list_empty(&lkb->lkb_ownqueue)) {
248 list_del_init(&lkb->lkb_ownqueue); 227 list_del_init(&lkb->lkb_ownqueue);
@@ -705,8 +684,9 @@ static int device_close(struct inode *inode, struct file *file)
705 return 0; 684 return 0;
706} 685}
707 686
708static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type, 687static int copy_result_to_user(struct dlm_user_args *ua, int compat,
709 int mode, char __user *buf, size_t count) 688 uint32_t flags, int mode, int copy_lvb,
689 char __user *buf, size_t count)
710{ 690{
711#ifdef CONFIG_COMPAT 691#ifdef CONFIG_COMPAT
712 struct dlm_lock_result32 result32; 692 struct dlm_lock_result32 result32;
@@ -730,7 +710,7 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
730 notes that a new blocking AST address and parameter are set even if 710 notes that a new blocking AST address and parameter are set even if
731 the conversion fails, so maybe we should just do that. */ 711 the conversion fails, so maybe we should just do that. */
732 712
733 if (type == AST_BAST) { 713 if (flags & DLM_CB_BAST) {
734 result.user_astaddr = ua->bastaddr; 714 result.user_astaddr = ua->bastaddr;
735 result.user_astparam = ua->bastparam; 715 result.user_astparam = ua->bastparam;
736 result.bast_mode = mode; 716 result.bast_mode = mode;
@@ -750,8 +730,7 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
750 /* copy lvb to userspace if there is one, it's been updated, and 730 /* copy lvb to userspace if there is one, it's been updated, and
751 the user buffer has space for it */ 731 the user buffer has space for it */
752 732
753 if (ua->update_user_lvb && ua->lksb.sb_lvbptr && 733 if (copy_lvb && ua->lksb.sb_lvbptr && count >= len + DLM_USER_LVB_LEN) {
754 count >= len + DLM_USER_LVB_LEN) {
755 if (copy_to_user(buf+len, ua->lksb.sb_lvbptr, 734 if (copy_to_user(buf+len, ua->lksb.sb_lvbptr,
756 DLM_USER_LVB_LEN)) { 735 DLM_USER_LVB_LEN)) {
757 error = -EFAULT; 736 error = -EFAULT;
@@ -801,13 +780,12 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
801 struct dlm_user_proc *proc = file->private_data; 780 struct dlm_user_proc *proc = file->private_data;
802 struct dlm_lkb *lkb; 781 struct dlm_lkb *lkb;
803 DECLARE_WAITQUEUE(wait, current); 782 DECLARE_WAITQUEUE(wait, current);
804 int error = 0, removed; 783 struct dlm_callback cb;
805 int ret_type, ret_mode; 784 int rv, resid, copy_lvb = 0;
806 int bastmode, castmode, do_bast, do_cast;
807 785
808 if (count == sizeof(struct dlm_device_version)) { 786 if (count == sizeof(struct dlm_device_version)) {
809 error = copy_version_to_user(buf, count); 787 rv = copy_version_to_user(buf, count);
810 return error; 788 return rv;
811 } 789 }
812 790
813 if (!proc) { 791 if (!proc) {
@@ -854,92 +832,57 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
854 } 832 }
855 } 833 }
856 834
857 /* there may be both completion and blocking asts to return for 835 /* if we empty lkb_callbacks, we don't want to unlock the spinlock
858 the lkb, don't remove lkb from asts list unless no asts remain */ 836 without removing lkb_astqueue; so empty lkb_astqueue is always
837 consistent with empty lkb_callbacks */
859 838
860 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); 839 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
861 840
862 removed = 0; 841 rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid);
863 ret_type = 0; 842 if (rv < 0) {
864 ret_mode = 0; 843 /* this shouldn't happen; lkb should have been removed from
865 do_bast = lkb->lkb_ast_type & AST_BAST; 844 list when resid was zero */
866 do_cast = lkb->lkb_ast_type & AST_COMP; 845 log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id);
867 bastmode = lkb->lkb_bastmode; 846 list_del_init(&lkb->lkb_astqueue);
868 castmode = lkb->lkb_castmode; 847 spin_unlock(&proc->asts_spin);
869 848 /* removes ref for proc->asts, may cause lkb to be freed */
870 /* when both are queued figure out which to do first and 849 dlm_put_lkb(lkb);
871 switch first so the other goes in the next read */ 850 goto try_another;
872
873 if (do_cast && do_bast) {
874 if (lkb->lkb_ast_first == AST_COMP) {
875 ret_type = AST_COMP;
876 ret_mode = castmode;
877 lkb->lkb_ast_type &= ~AST_COMP;
878 lkb->lkb_ast_first = AST_BAST;
879 } else {
880 ret_type = AST_BAST;
881 ret_mode = bastmode;
882 lkb->lkb_ast_type &= ~AST_BAST;
883 lkb->lkb_ast_first = AST_COMP;
884 }
885 } else {
886 ret_type = lkb->lkb_ast_first;
887 ret_mode = (ret_type == AST_COMP) ? castmode : bastmode;
888 lkb->lkb_ast_type &= ~ret_type;
889 lkb->lkb_ast_first = 0;
890 } 851 }
852 if (!resid)
853 list_del_init(&lkb->lkb_astqueue);
854 spin_unlock(&proc->asts_spin);
891 855
892 /* if we're doing a bast but the bast is unnecessary, then 856 if (cb.flags & DLM_CB_SKIP) {
893 switch to do nothing or do a cast if that was needed next */ 857 /* removes ref for proc->asts, may cause lkb to be freed */
894 858 if (!resid)
895 if ((ret_type == AST_BAST) && 859 dlm_put_lkb(lkb);
896 dlm_modes_compat(bastmode, lkb->lkb_castmode_done)) { 860 goto try_another;
897 ret_type = 0;
898 ret_mode = 0;
899
900 if (do_cast) {
901 ret_type = AST_COMP;
902 ret_mode = castmode;
903 lkb->lkb_ast_type &= ~AST_COMP;
904 lkb->lkb_ast_first = 0;
905 }
906 } 861 }
907 862
908 if (lkb->lkb_ast_first != lkb->lkb_ast_type) { 863 if (cb.flags & DLM_CB_CAST) {
909 log_print("device_read %x ast_first %x ast_type %x", 864 int old_mode, new_mode;
910 lkb->lkb_id, lkb->lkb_ast_first, lkb->lkb_ast_type);
911 }
912 865
913 if (!lkb->lkb_ast_type) { 866 old_mode = lkb->lkb_last_cast.mode;
914 list_del(&lkb->lkb_astqueue); 867 new_mode = cb.mode;
915 removed = 1;
916 }
917 spin_unlock(&proc->asts_spin);
918 868
919 if (ret_type) { 869 if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr &&
920 error = copy_result_to_user(lkb->lkb_ua, 870 dlm_lvb_operations[old_mode + 1][new_mode + 1])
921 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags), 871 copy_lvb = 1;
922 ret_type, ret_mode, buf, count);
923 872
924 if (ret_type == AST_COMP) 873 lkb->lkb_lksb->sb_status = cb.sb_status;
925 lkb->lkb_castmode_done = castmode; 874 lkb->lkb_lksb->sb_flags = cb.sb_flags;
926 if (ret_type == AST_BAST)
927 lkb->lkb_bastmode_done = bastmode;
928 } 875 }
929 876
930 /* removes reference for the proc->asts lists added by 877 rv = copy_result_to_user(lkb->lkb_ua,
931 dlm_user_add_ast() and may result in the lkb being freed */ 878 test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
879 cb.flags, cb.mode, copy_lvb, buf, count);
932 880
933 if (removed) 881 /* removes ref for proc->asts, may cause lkb to be freed */
882 if (!resid)
934 dlm_put_lkb(lkb); 883 dlm_put_lkb(lkb);
935 884
936 /* the bast that was queued was eliminated (see unnecessary above), 885 return rv;
937 leaving nothing to return */
938
939 if (!ret_type)
940 goto try_another;
941
942 return error;
943} 886}
944 887
945static unsigned int device_poll(struct file *file, poll_table *wait) 888static unsigned int device_poll(struct file *file, poll_table *wait)
diff --git a/fs/dlm/user.h b/fs/dlm/user.h
index f196091dd7ff..00499ab8835f 100644
--- a/fs/dlm/user.h
+++ b/fs/dlm/user.h
@@ -9,7 +9,8 @@
9#ifndef __USER_DOT_H__ 9#ifndef __USER_DOT_H__
10#define __USER_DOT_H__ 10#define __USER_DOT_H__
11 11
12void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode); 12void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
13 int status, uint32_t sbflags, uint64_t seq);
13int dlm_user_init(void); 14int dlm_user_init(void);
14void dlm_user_exit(void); 15void dlm_user_exit(void);
15int dlm_device_deregister(struct dlm_ls *ls); 16int dlm_device_deregister(struct dlm_ls *ls);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 2195c213ab2f..816f88e6b9ce 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -45,7 +45,11 @@ static void drop_slab(void)
45int drop_caches_sysctl_handler(ctl_table *table, int write, 45int drop_caches_sysctl_handler(ctl_table *table, int write,
46 void __user *buffer, size_t *length, loff_t *ppos) 46 void __user *buffer, size_t *length, loff_t *ppos)
47{ 47{
48 proc_dointvec_minmax(table, write, buffer, length, ppos); 48 int ret;
49
50 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
51 if (ret)
52 return ret;
49 if (write) { 53 if (write) {
50 if (sysctl_drop_caches & 1) 54 if (sysctl_drop_caches & 1)
51 iterate_supers(drop_pagecache_sb, NULL); 55 iterate_supers(drop_pagecache_sb, NULL);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4a09af9e9a63..ed38801b57a7 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -62,7 +62,7 @@
62 * This mutex is acquired by ep_free() during the epoll file 62 * This mutex is acquired by ep_free() during the epoll file
63 * cleanup path and it is also acquired by eventpoll_release_file() 63 * cleanup path and it is also acquired by eventpoll_release_file()
64 * if a file has been pushed inside an epoll set and it is then 64 * if a file has been pushed inside an epoll set and it is then
65 * close()d without a previous call toepoll_ctl(EPOLL_CTL_DEL). 65 * close()d without a previous call to epoll_ctl(EPOLL_CTL_DEL).
66 * It is also acquired when inserting an epoll fd onto another epoll 66 * It is also acquired when inserting an epoll fd onto another epoll
67 * fd. We do this so that we walk the epoll tree and ensure that this 67 * fd. We do this so that we walk the epoll tree and ensure that this
68 * insertion does not create a cycle of epoll file descriptors, which 68 * insertion does not create a cycle of epoll file descriptors, which
@@ -152,11 +152,11 @@ struct epitem {
152 152
153/* 153/*
154 * This structure is stored inside the "private_data" member of the file 154 * This structure is stored inside the "private_data" member of the file
155 * structure and rapresent the main data sructure for the eventpoll 155 * structure and represents the main data structure for the eventpoll
156 * interface. 156 * interface.
157 */ 157 */
158struct eventpoll { 158struct eventpoll {
159 /* Protect the this structure access */ 159 /* Protect the access to this structure */
160 spinlock_t lock; 160 spinlock_t lock;
161 161
162 /* 162 /*
@@ -316,6 +316,19 @@ static void ep_nested_calls_init(struct nested_calls *ncalls)
316} 316}
317 317
318/** 318/**
319 * ep_events_available - Checks if ready events might be available.
320 *
321 * @ep: Pointer to the eventpoll context.
322 *
323 * Returns: Returns a value different than zero if ready events are available,
324 * or zero otherwise.
325 */
326static inline int ep_events_available(struct eventpoll *ep)
327{
328 return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
329}
330
331/**
319 * ep_call_nested - Perform a bound (possibly) nested call, by checking 332 * ep_call_nested - Perform a bound (possibly) nested call, by checking
320 * that the recursion limit is not exceeded, and that 333 * that the recursion limit is not exceeded, and that
321 * the same nested call (by the meaning of same cookie) is 334 * the same nested call (by the meaning of same cookie) is
@@ -793,7 +806,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
793 806
794/* 807/*
795 * This is the callback that is passed to the wait queue wakeup 808 * This is the callback that is passed to the wait queue wakeup
796 * machanism. It is called by the stored file descriptors when they 809 * mechanism. It is called by the stored file descriptors when they
797 * have events to report. 810 * have events to report.
798 */ 811 */
799static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key) 812static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *key)
@@ -824,9 +837,9 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
824 goto out_unlock; 837 goto out_unlock;
825 838
826 /* 839 /*
827 * If we are trasfering events to userspace, we can hold no locks 840 * If we are transferring events to userspace, we can hold no locks
828 * (because we're accessing user memory, and because of linux f_op->poll() 841 * (because we're accessing user memory, and because of linux f_op->poll()
829 * semantics). All the events that happens during that period of time are 842 * semantics). All the events that happen during that period of time are
830 * chained in ep->ovflist and requeued later on. 843 * chained in ep->ovflist and requeued later on.
831 */ 844 */
832 if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { 845 if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) {
@@ -1135,12 +1148,29 @@ static inline struct timespec ep_set_mstimeout(long ms)
1135 return timespec_add_safe(now, ts); 1148 return timespec_add_safe(now, ts);
1136} 1149}
1137 1150
1151/**
1152 * ep_poll - Retrieves ready events, and delivers them to the caller supplied
1153 * event buffer.
1154 *
1155 * @ep: Pointer to the eventpoll context.
1156 * @events: Pointer to the userspace buffer where the ready events should be
1157 * stored.
1158 * @maxevents: Size (in terms of number of events) of the caller event buffer.
1159 * @timeout: Maximum timeout for the ready events fetch operation, in
1160 * milliseconds. If the @timeout is zero, the function will not block,
1161 * while if the @timeout is less than zero, the function will block
1162 * until at least one event has been retrieved (or an error
1163 * occurred).
1164 *
1165 * Returns: Returns the number of ready events which have been fetched, or an
1166 * error code, in case of error.
1167 */
1138static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, 1168static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1139 int maxevents, long timeout) 1169 int maxevents, long timeout)
1140{ 1170{
1141 int res, eavail, timed_out = 0; 1171 int res = 0, eavail, timed_out = 0;
1142 unsigned long flags; 1172 unsigned long flags;
1143 long slack; 1173 long slack = 0;
1144 wait_queue_t wait; 1174 wait_queue_t wait;
1145 ktime_t expires, *to = NULL; 1175 ktime_t expires, *to = NULL;
1146 1176
@@ -1151,14 +1181,19 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
1151 to = &expires; 1181 to = &expires;
1152 *to = timespec_to_ktime(end_time); 1182 *to = timespec_to_ktime(end_time);
1153 } else if (timeout == 0) { 1183 } else if (timeout == 0) {
1184 /*
1185 * Avoid the unnecessary trip to the wait queue loop, if the
1186 * caller specified a non blocking operation.
1187 */
1154 timed_out = 1; 1188 timed_out = 1;
1189 spin_lock_irqsave(&ep->lock, flags);
1190 goto check_events;
1155 } 1191 }
1156 1192
1157retry: 1193fetch_events:
1158 spin_lock_irqsave(&ep->lock, flags); 1194 spin_lock_irqsave(&ep->lock, flags);
1159 1195
1160 res = 0; 1196 if (!ep_events_available(ep)) {
1161 if (list_empty(&ep->rdllist)) {
1162 /* 1197 /*
1163 * We don't have any available event to return to the caller. 1198 * We don't have any available event to return to the caller.
1164 * We need to sleep here, and we will be wake up by 1199 * We need to sleep here, and we will be wake up by
@@ -1174,7 +1209,7 @@ retry:
1174 * to TASK_INTERRUPTIBLE before doing the checks. 1209 * to TASK_INTERRUPTIBLE before doing the checks.
1175 */ 1210 */
1176 set_current_state(TASK_INTERRUPTIBLE); 1211 set_current_state(TASK_INTERRUPTIBLE);
1177 if (!list_empty(&ep->rdllist) || timed_out) 1212 if (ep_events_available(ep) || timed_out)
1178 break; 1213 break;
1179 if (signal_pending(current)) { 1214 if (signal_pending(current)) {
1180 res = -EINTR; 1215 res = -EINTR;
@@ -1191,8 +1226,9 @@ retry:
1191 1226
1192 set_current_state(TASK_RUNNING); 1227 set_current_state(TASK_RUNNING);
1193 } 1228 }
1229check_events:
1194 /* Is it worth to try to dig for events ? */ 1230 /* Is it worth to try to dig for events ? */
1195 eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; 1231 eavail = ep_events_available(ep);
1196 1232
1197 spin_unlock_irqrestore(&ep->lock, flags); 1233 spin_unlock_irqrestore(&ep->lock, flags);
1198 1234
@@ -1203,7 +1239,7 @@ retry:
1203 */ 1239 */
1204 if (!res && eavail && 1240 if (!res && eavail &&
1205 !(res = ep_send_events(ep, events, maxevents)) && !timed_out) 1241 !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
1206 goto retry; 1242 goto fetch_events;
1207 1243
1208 return res; 1244 return res;
1209} 1245}
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6ab..5e62d26a4fec 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
115 struct file *file; 115 struct file *file;
116 char *tmp = getname(library); 116 char *tmp = getname(library);
117 int error = PTR_ERR(tmp); 117 int error = PTR_ERR(tmp);
118 static const struct open_flags uselib_flags = {
119 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
120 .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN,
121 .intent = LOOKUP_OPEN
122 };
118 123
119 if (IS_ERR(tmp)) 124 if (IS_ERR(tmp))
120 goto out; 125 goto out;
121 126
122 file = do_filp_open(AT_FDCWD, tmp, 127 file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW);
123 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
124 MAY_READ | MAY_EXEC | MAY_OPEN);
125 putname(tmp); 128 putname(tmp);
126 error = PTR_ERR(file); 129 error = PTR_ERR(file);
127 if (IS_ERR(file)) 130 if (IS_ERR(file))
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name)
721{ 724{
722 struct file *file; 725 struct file *file;
723 int err; 726 int err;
727 static const struct open_flags open_exec_flags = {
728 .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
729 .acc_mode = MAY_EXEC | MAY_OPEN,
730 .intent = LOOKUP_OPEN
731 };
724 732
725 file = do_filp_open(AT_FDCWD, name, 733 file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW);
726 O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0,
727 MAY_EXEC | MAY_OPEN);
728 if (IS_ERR(file)) 734 if (IS_ERR(file))
729 goto out; 735 goto out;
730 736
@@ -1869,7 +1875,7 @@ static void wait_for_dump_helpers(struct file *file)
1869 1875
1870 1876
1871/* 1877/*
1872 * uhm_pipe_setup 1878 * umh_pipe_setup
1873 * helper function to customize the process used 1879 * helper function to customize the process used
1874 * to collect the core in userspace. Specifically 1880 * to collect the core in userspace. Specifically
1875 * it sets up a pipe and installs it as fd 0 (stdin) 1881 * it sets up a pipe and installs it as fd 0 (stdin)
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index f0d520312d8b..5e74ad3d4009 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -53,10 +53,14 @@
53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */ 53#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
54 54
55/* exofs Application specific page/attribute */ 55/* exofs Application specific page/attribute */
56/* Inode attrs */
56# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3) 57# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
57# define EXOFS_ATTR_INODE_DATA 1 58# define EXOFS_ATTR_INODE_DATA 1
58# define EXOFS_ATTR_INODE_FILE_LAYOUT 2 59# define EXOFS_ATTR_INODE_FILE_LAYOUT 2
59# define EXOFS_ATTR_INODE_DIR_LAYOUT 3 60# define EXOFS_ATTR_INODE_DIR_LAYOUT 3
61/* Partition attrs */
62# define EXOFS_APAGE_SB_DATA (0xF0000000U + 3)
63# define EXOFS_ATTR_SB_STATS 1
60 64
61/* 65/*
62 * The maximum number of files we can have is limited by the size of the 66 * The maximum number of files we can have is limited by the size of the
@@ -86,8 +90,8 @@ enum {
86 */ 90 */
87enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1}; 91enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
88struct exofs_fscb { 92struct exofs_fscb {
89 __le64 s_nextid; /* Highest object ID used */ 93 __le64 s_nextid; /* Only used after mkfs */
90 __le64 s_numfiles; /* Number of files on fs */ 94 __le64 s_numfiles; /* Only used after mkfs */
91 __le32 s_version; /* == EXOFS_FSCB_VER */ 95 __le32 s_version; /* == EXOFS_FSCB_VER */
92 __le16 s_magic; /* Magic signature */ 96 __le16 s_magic; /* Magic signature */
93 __le16 s_newfs; /* Non-zero if this is a new fs */ 97 __le16 s_newfs; /* Non-zero if this is a new fs */
@@ -98,6 +102,16 @@ struct exofs_fscb {
98} __packed; 102} __packed;
99 103
100/* 104/*
105 * This struct is set on the FS partition's attributes.
106 * [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together
107 * with the create command, to atomically persist the sb writeable information.
108 */
109struct exofs_sb_stats {
110 __le64 s_nextid; /* Highest object ID used */
111 __le64 s_numfiles; /* Number of files on fs */
112} __packed;
113
114/*
101 * Describes the raid used in the FS. It is part of the device table. 115 * Describes the raid used in the FS. It is part of the device table.
102 * This here is taken from the pNFS-objects definition. In exofs we 116 * This here is taken from the pNFS-objects definition. In exofs we
103 * use one raid policy through-out the filesystem. (NOTE: the funny 117 * use one raid policy through-out the filesystem. (NOTE: the funny
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index dcc941d82d67..d0941c6a1f72 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -124,7 +124,7 @@ out:
124 124
125Ebadsize: 125Ebadsize:
126 EXOFS_ERR("ERROR [exofs_check_page]: " 126 EXOFS_ERR("ERROR [exofs_check_page]: "
127 "size of directory #%lu is not a multiple of chunk size", 127 "size of directory(0x%lx) is not a multiple of chunk size\n",
128 dir->i_ino 128 dir->i_ino
129 ); 129 );
130 goto fail; 130 goto fail;
@@ -142,8 +142,8 @@ Espan:
142 goto bad_entry; 142 goto bad_entry;
143bad_entry: 143bad_entry:
144 EXOFS_ERR( 144 EXOFS_ERR(
145 "ERROR [exofs_check_page]: bad entry in directory #%lu: %s - " 145 "ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
146 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d", 146 "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
147 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, 147 dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
148 _LLU(le64_to_cpu(p->inode_no)), 148 _LLU(le64_to_cpu(p->inode_no)),
149 rec_len, p->name_len); 149 rec_len, p->name_len);
@@ -151,8 +151,8 @@ bad_entry:
151Eend: 151Eend:
152 p = (struct exofs_dir_entry *)(kaddr + offs); 152 p = (struct exofs_dir_entry *)(kaddr + offs);
153 EXOFS_ERR("ERROR [exofs_check_page]: " 153 EXOFS_ERR("ERROR [exofs_check_page]: "
154 "entry in directory #%lu spans the page boundary" 154 "entry in directory(0x%lx) spans the page boundary"
155 "offset=%lu, inode=%llu", 155 "offset=%lu, inode=0x%llx\n",
156 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, 156 dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
157 _LLU(le64_to_cpu(p->inode_no))); 157 _LLU(le64_to_cpu(p->inode_no)));
158fail: 158fail:
@@ -261,9 +261,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
261 struct page *page = exofs_get_page(inode, n); 261 struct page *page = exofs_get_page(inode, n);
262 262
263 if (IS_ERR(page)) { 263 if (IS_ERR(page)) {
264 EXOFS_ERR("ERROR: " 264 EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
265 "bad page in #%lu", 265 inode->i_ino);
266 inode->i_ino);
267 filp->f_pos += PAGE_CACHE_SIZE - offset; 266 filp->f_pos += PAGE_CACHE_SIZE - offset;
268 return PTR_ERR(page); 267 return PTR_ERR(page);
269 } 268 }
@@ -283,7 +282,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
283 for (; (char *)de <= limit; de = exofs_next_entry(de)) { 282 for (; (char *)de <= limit; de = exofs_next_entry(de)) {
284 if (de->rec_len == 0) { 283 if (de->rec_len == 0) {
285 EXOFS_ERR("ERROR: " 284 EXOFS_ERR("ERROR: "
286 "zero-length directory entry"); 285 "zero-length entry in directory(0x%lx)\n",
286 inode->i_ino);
287 exofs_put_page(page); 287 exofs_put_page(page);
288 return -EIO; 288 return -EIO;
289 } 289 }
@@ -342,9 +342,9 @@ struct exofs_dir_entry *exofs_find_entry(struct inode *dir,
342 kaddr += exofs_last_byte(dir, n) - reclen; 342 kaddr += exofs_last_byte(dir, n) - reclen;
343 while ((char *) de <= kaddr) { 343 while ((char *) de <= kaddr) {
344 if (de->rec_len == 0) { 344 if (de->rec_len == 0) {
345 EXOFS_ERR( 345 EXOFS_ERR("ERROR: zero-length entry in "
346 "ERROR: exofs_find_entry: " 346 "directory(0x%lx)\n",
347 "zero-length directory entry"); 347 dir->i_ino);
348 exofs_put_page(page); 348 exofs_put_page(page);
349 goto out; 349 goto out;
350 } 350 }
@@ -472,7 +472,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
472 } 472 }
473 if (de->rec_len == 0) { 473 if (de->rec_len == 0) {
474 EXOFS_ERR("ERROR: exofs_add_link: " 474 EXOFS_ERR("ERROR: exofs_add_link: "
475 "zero-length directory entry"); 475 "zero-length entry in directory(0x%lx)\n",
476 inode->i_ino);
476 err = -EIO; 477 err = -EIO;
477 goto out_unlock; 478 goto out_unlock;
478 } 479 }
@@ -491,7 +492,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
491 exofs_put_page(page); 492 exofs_put_page(page);
492 } 493 }
493 494
494 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode); 495 EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=0x%lx\n",
496 dentry, inode->i_ino);
495 return -EINVAL; 497 return -EINVAL;
496 498
497got_it: 499got_it:
@@ -542,7 +544,8 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
542 while (de < dir) { 544 while (de < dir) {
543 if (de->rec_len == 0) { 545 if (de->rec_len == 0) {
544 EXOFS_ERR("ERROR: exofs_delete_entry:" 546 EXOFS_ERR("ERROR: exofs_delete_entry:"
545 "zero-length directory entry"); 547 "zero-length entry in directory(0x%lx)\n",
548 inode->i_ino);
546 err = -EIO; 549 err = -EIO;
547 goto out; 550 goto out;
548 } 551 }
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 2dc925fa1010..c965806c2821 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -77,7 +77,7 @@ struct exofs_layout {
77 * our extension to the in-memory superblock 77 * our extension to the in-memory superblock
78 */ 78 */
79struct exofs_sb_info { 79struct exofs_sb_info {
80 struct exofs_fscb s_fscb; /* Written often, pre-allocate*/ 80 struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
81 int s_timeout; /* timeout for OSD operations */ 81 int s_timeout; /* timeout for OSD operations */
82 uint64_t s_nextid; /* highest object ID used */ 82 uint64_t s_nextid; /* highest object ID used */
83 uint32_t s_numfiles; /* number of files on fs */ 83 uint32_t s_numfiles; /* number of files on fs */
@@ -256,6 +256,8 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
256} 256}
257 257
258/* inode.c */ 258/* inode.c */
259unsigned exofs_max_io_pages(struct exofs_layout *layout,
260 unsigned expected_pages);
259int exofs_setattr(struct dentry *, struct iattr *); 261int exofs_setattr(struct dentry *, struct iattr *);
260int exofs_write_begin(struct file *file, struct address_space *mapping, 262int exofs_write_begin(struct file *file, struct address_space *mapping,
261 loff_t pos, unsigned len, unsigned flags, 263 loff_t pos, unsigned len, unsigned flags,
@@ -279,7 +281,7 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
279 struct inode *); 281 struct inode *);
280 282
281/* super.c */ 283/* super.c */
282int exofs_sync_fs(struct super_block *sb, int wait); 284int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
283 285
284/********************* 286/*********************
285 * operation vectors * 287 * operation vectors *
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index b905c79b4f0a..45ca323d8363 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -45,22 +45,8 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
45static int exofs_file_fsync(struct file *filp, int datasync) 45static int exofs_file_fsync(struct file *filp, int datasync)
46{ 46{
47 int ret; 47 int ret;
48 struct inode *inode = filp->f_mapping->host;
49 struct super_block *sb;
50
51 if (!(inode->i_state & I_DIRTY))
52 return 0;
53 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
54 return 0;
55
56 ret = sync_inode_metadata(inode, 1);
57
58 /* This is a good place to write the sb */
59 /* TODO: Sechedule an sb-sync on create */
60 sb = inode->i_sb;
61 if (sb->s_dirt)
62 exofs_sync_fs(sb, 1);
63 48
49 ret = sync_inode_metadata(filp->f_mapping->host, 1);
64 return ret; 50 return ret;
65} 51}
66 52
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index a7555238c41a..0c713cfbebf0 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
43 PAGE_SIZE / sizeof(struct page *), 43 PAGE_SIZE / sizeof(struct page *),
44}; 44};
45 45
46unsigned exofs_max_io_pages(struct exofs_layout *layout,
47 unsigned expected_pages)
48{
49 unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
50
51 /* TODO: easily support bio chaining */
52 pages = min_t(unsigned, pages,
53 layout->group_width * BIO_MAX_PAGES_KMALLOC);
54 return pages;
55}
56
46struct page_collect { 57struct page_collect {
47 struct exofs_sb_info *sbi; 58 struct exofs_sb_info *sbi;
48 struct inode *inode; 59 struct inode *inode;
@@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
97 108
98static int pcol_try_alloc(struct page_collect *pcol) 109static int pcol_try_alloc(struct page_collect *pcol)
99{ 110{
100 unsigned pages = min_t(unsigned, pcol->expected_pages, 111 unsigned pages;
101 MAX_PAGES_KMALLOC);
102 112
103 if (!pcol->ios) { /* First time allocate io_state */ 113 if (!pcol->ios) { /* First time allocate io_state */
104 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios); 114 int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
@@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
108 } 118 }
109 119
110 /* TODO: easily support bio chaining */ 120 /* TODO: easily support bio chaining */
111 pages = min_t(unsigned, pages, 121 pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
112 pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
113 122
114 for (; pages; pages >>= 1) { 123 for (; pages; pages >>= 1) {
115 pcol->pages = kmalloc(pages * sizeof(struct page *), 124 pcol->pages = kmalloc(pages * sizeof(struct page *),
@@ -350,8 +359,10 @@ static int readpage_strip(void *data, struct page *page)
350 359
351 if (!pcol->read_4_write) 360 if (!pcol->read_4_write)
352 unlock_page(page); 361 unlock_page(page);
353 EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," 362 EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx "
354 " splitting\n", inode->i_ino, page->index); 363 "read_4_write=%d index=0x%lx end_index=0x%lx "
364 "splitting\n", inode->i_ino, len,
365 pcol->read_4_write, page->index, end_index);
355 366
356 return read_exec(pcol); 367 return read_exec(pcol);
357 } 368 }
@@ -722,11 +733,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
722 733
723 /* read modify write */ 734 /* read modify write */
724 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) { 735 if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
736 loff_t i_size = i_size_read(mapping->host);
737 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
738 size_t rlen;
739
740 if (page->index < end_index)
741 rlen = PAGE_CACHE_SIZE;
742 else if (page->index == end_index)
743 rlen = i_size & ~PAGE_CACHE_MASK;
744 else
745 rlen = 0;
746
747 if (!rlen) {
748 clear_highpage(page);
749 SetPageUptodate(page);
750 goto out;
751 }
752
725 ret = _readpage(page, true); 753 ret = _readpage(page, true);
726 if (ret) { 754 if (ret) {
727 /*SetPageError was done by _readpage. Is it ok?*/ 755 /*SetPageError was done by _readpage. Is it ok?*/
728 unlock_page(page); 756 unlock_page(page);
729 EXOFS_DBGMSG("__readpage_filler failed\n"); 757 EXOFS_DBGMSG("__readpage failed\n");
730 } 758 }
731 } 759 }
732out: 760out:
@@ -1030,6 +1058,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
1030 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); 1058 memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
1031 } 1059 }
1032 1060
1061 inode->i_mapping->backing_dev_info = sb->s_bdi;
1033 if (S_ISREG(inode->i_mode)) { 1062 if (S_ISREG(inode->i_mode)) {
1034 inode->i_op = &exofs_file_inode_operations; 1063 inode->i_op = &exofs_file_inode_operations;
1035 inode->i_fop = &exofs_file_operations; 1064 inode->i_fop = &exofs_file_operations;
@@ -1073,6 +1102,7 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
1073 } 1102 }
1074 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; 1103 return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
1075} 1104}
1105
1076/* 1106/*
1077 * Callback function from exofs_new_inode(). The important thing is that we 1107 * Callback function from exofs_new_inode(). The important thing is that we
1078 * set the obj_created flag so that other methods know that the object exists on 1108 * set the obj_created flag so that other methods know that the object exists on
@@ -1130,7 +1160,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1130 1160
1131 sbi = sb->s_fs_info; 1161 sbi = sb->s_fs_info;
1132 1162
1133 sb->s_dirt = 1; 1163 inode->i_mapping->backing_dev_info = sb->s_bdi;
1134 inode_init_owner(inode, dir, mode); 1164 inode_init_owner(inode, dir, mode);
1135 inode->i_ino = sbi->s_nextid++; 1165 inode->i_ino = sbi->s_nextid++;
1136 inode->i_blkbits = EXOFS_BLKSHIFT; 1166 inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1141,6 +1171,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
1141 spin_unlock(&sbi->s_next_gen_lock); 1171 spin_unlock(&sbi->s_next_gen_lock);
1142 insert_inode_hash(inode); 1172 insert_inode_hash(inode);
1143 1173
1174 exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
1175
1144 mark_inode_dirty(inode); 1176 mark_inode_dirty(inode);
1145 1177
1146 ret = exofs_get_io_state(&sbi->layout, &ios); 1178 ret = exofs_get_io_state(&sbi->layout, &ios);
@@ -1271,7 +1303,8 @@ out:
1271 1303
1272int exofs_write_inode(struct inode *inode, struct writeback_control *wbc) 1304int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
1273{ 1305{
1274 return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 1306 /* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */
1307 return exofs_update_inode(inode, 1);
1275} 1308}
1276 1309
1277/* 1310/*
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 264e95d02830..4d70db110cfc 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -272,7 +272,6 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page); 272 new_de = exofs_find_entry(new_dir, new_dentry, &new_page);
273 if (!new_de) 273 if (!new_de)
274 goto out_dir; 274 goto out_dir;
275 inode_inc_link_count(old_inode);
276 err = exofs_set_link(new_dir, new_de, new_page, old_inode); 275 err = exofs_set_link(new_dir, new_de, new_page, old_inode);
277 new_inode->i_ctime = CURRENT_TIME; 276 new_inode->i_ctime = CURRENT_TIME;
278 if (dir_de) 277 if (dir_de)
@@ -286,12 +285,9 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
286 if (new_dir->i_nlink >= EXOFS_LINK_MAX) 285 if (new_dir->i_nlink >= EXOFS_LINK_MAX)
287 goto out_dir; 286 goto out_dir;
288 } 287 }
289 inode_inc_link_count(old_inode);
290 err = exofs_add_link(new_dentry, old_inode); 288 err = exofs_add_link(new_dentry, old_inode);
291 if (err) { 289 if (err)
292 inode_dec_link_count(old_inode);
293 goto out_dir; 290 goto out_dir;
294 }
295 if (dir_de) 291 if (dir_de)
296 inode_inc_link_count(new_dir); 292 inode_inc_link_count(new_dir);
297 } 293 }
@@ -299,7 +295,7 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
299 old_inode->i_ctime = CURRENT_TIME; 295 old_inode->i_ctime = CURRENT_TIME;
300 296
301 exofs_delete_entry(old_de, old_page); 297 exofs_delete_entry(old_de, old_page);
302 inode_dec_link_count(old_inode); 298 mark_inode_dirty(old_inode);
303 299
304 if (dir_de) { 300 if (dir_de) {
305 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir); 301 err = exofs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 8c6c4669b381..06065bd37fc3 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -48,6 +48,7 @@
48 * struct to hold what we get from mount options 48 * struct to hold what we get from mount options
49 */ 49 */
50struct exofs_mountopt { 50struct exofs_mountopt {
51 bool is_osdname;
51 const char *dev_name; 52 const char *dev_name;
52 uint64_t pid; 53 uint64_t pid;
53 int timeout; 54 int timeout;
@@ -56,7 +57,7 @@ struct exofs_mountopt {
56/* 57/*
57 * exofs-specific mount-time options. 58 * exofs-specific mount-time options.
58 */ 59 */
59enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err }; 60enum { Opt_name, Opt_pid, Opt_to, Opt_err };
60 61
61/* 62/*
62 * Our mount-time options. These should ideally be 64-bit unsigned, but the 63 * Our mount-time options. These should ideally be 64-bit unsigned, but the
@@ -64,6 +65,7 @@ enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
64 * sufficient for most applications now. 65 * sufficient for most applications now.
65 */ 66 */
66static match_table_t tokens = { 67static match_table_t tokens = {
68 {Opt_name, "osdname=%s"},
67 {Opt_pid, "pid=%u"}, 69 {Opt_pid, "pid=%u"},
68 {Opt_to, "to=%u"}, 70 {Opt_to, "to=%u"},
69 {Opt_err, NULL} 71 {Opt_err, NULL}
@@ -94,6 +96,14 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
94 96
95 token = match_token(p, tokens, args); 97 token = match_token(p, tokens, args);
96 switch (token) { 98 switch (token) {
99 case Opt_name:
100 opts->dev_name = match_strdup(&args[0]);
101 if (unlikely(!opts->dev_name)) {
102 EXOFS_ERR("Error allocating dev_name");
103 return -ENOMEM;
104 }
105 opts->is_osdname = true;
106 break;
97 case Opt_pid: 107 case Opt_pid:
98 if (0 == match_strlcpy(str, &args[0], sizeof(str))) 108 if (0 == match_strlcpy(str, &args[0], sizeof(str)))
99 return -EINVAL; 109 return -EINVAL;
@@ -203,6 +213,101 @@ static void destroy_inodecache(void)
203static const struct super_operations exofs_sops; 213static const struct super_operations exofs_sops;
204static const struct export_operations exofs_export_ops; 214static const struct export_operations exofs_export_ops;
205 215
216static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
217 EXOFS_APAGE_SB_DATA,
218 EXOFS_ATTR_SB_STATS,
219 sizeof(struct exofs_sb_stats));
220
221static int __sbi_read_stats(struct exofs_sb_info *sbi)
222{
223 struct osd_attr attrs[] = {
224 [0] = g_attr_sb_stats,
225 };
226 struct exofs_io_state *ios;
227 int ret;
228
229 ret = exofs_get_io_state(&sbi->layout, &ios);
230 if (unlikely(ret)) {
231 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
232 return ret;
233 }
234
235 ios->cred = sbi->s_cred;
236
237 ios->in_attr = attrs;
238 ios->in_attr_len = ARRAY_SIZE(attrs);
239
240 ret = exofs_sbi_read(ios);
241 if (unlikely(ret)) {
242 EXOFS_ERR("Error reading super_block stats => %d\n", ret);
243 goto out;
244 }
245
246 ret = extract_attr_from_ios(ios, &attrs[0]);
247 if (ret) {
248 EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__);
249 goto out;
250 }
251 if (attrs[0].len) {
252 struct exofs_sb_stats *ess;
253
254 if (unlikely(attrs[0].len != sizeof(*ess))) {
255 EXOFS_ERR("%s: Wrong version of exofs_sb_stats "
256 "size(%d) != expected(%zd)\n",
257 __func__, attrs[0].len, sizeof(*ess));
258 goto out;
259 }
260
261 ess = attrs[0].val_ptr;
262 sbi->s_nextid = le64_to_cpu(ess->s_nextid);
263 sbi->s_numfiles = le32_to_cpu(ess->s_numfiles);
264 }
265
266out:
267 exofs_put_io_state(ios);
268 return ret;
269}
270
271static void stats_done(struct exofs_io_state *ios, void *p)
272{
273 exofs_put_io_state(ios);
274 /* Good thanks nothing to do anymore */
275}
276
277/* Asynchronously write the stats attribute */
278int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
279{
280 struct osd_attr attrs[] = {
281 [0] = g_attr_sb_stats,
282 };
283 struct exofs_io_state *ios;
284 int ret;
285
286 ret = exofs_get_io_state(&sbi->layout, &ios);
287 if (unlikely(ret)) {
288 EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
289 return ret;
290 }
291
292 sbi->s_ess.s_nextid = cpu_to_le64(sbi->s_nextid);
293 sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
294 attrs[0].val_ptr = &sbi->s_ess;
295
296 ios->cred = sbi->s_cred;
297 ios->done = stats_done;
298 ios->private = sbi;
299 ios->out_attr = attrs;
300 ios->out_attr_len = ARRAY_SIZE(attrs);
301
302 ret = exofs_sbi_write(ios);
303 if (unlikely(ret)) {
304 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
305 exofs_put_io_state(ios);
306 }
307
308 return ret;
309}
310
206/* 311/*
207 * Write the superblock to the OSD 312 * Write the superblock to the OSD
208 */ 313 */
@@ -213,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait)
213 struct exofs_io_state *ios; 318 struct exofs_io_state *ios;
214 int ret = -ENOMEM; 319 int ret = -ENOMEM;
215 320
216 lock_super(sb); 321 fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
322 if (unlikely(!fscb))
323 return -ENOMEM;
324
217 sbi = sb->s_fs_info; 325 sbi = sb->s_fs_info;
218 fscb = &sbi->s_fscb;
219 326
327 /* NOTE: We no longer dirty the super_block anywhere in exofs. The
328 * reason we write the fscb here on unmount is so we can stay backwards
329 * compatible with fscb->s_version == 1. (What we are not compatible
330 * with is if a new version FS crashed and then we try to mount an old
331 * version). Otherwise the exofs_fscb is read-only from mkfs time. All
332 * the writeable info is set in exofs_sbi_write_stats() above.
333 */
220 ret = exofs_get_io_state(&sbi->layout, &ios); 334 ret = exofs_get_io_state(&sbi->layout, &ios);
221 if (ret) 335 if (unlikely(ret))
222 goto out; 336 goto out;
223 337
224 /* Note: We only write the changing part of the fscb. .i.e upto the 338 lock_super(sb);
225 * the fscb->s_dev_table_oid member. There is no read-modify-write 339
226 * here.
227 */
228 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid); 340 ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
229 memset(fscb, 0, ios->length); 341 memset(fscb, 0, ios->length);
230 fscb->s_nextid = cpu_to_le64(sbi->s_nextid); 342 fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -239,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait)
239 ios->cred = sbi->s_cred; 351 ios->cred = sbi->s_cred;
240 352
241 ret = exofs_sbi_write(ios); 353 ret = exofs_sbi_write(ios);
242 if (unlikely(ret)) { 354 if (unlikely(ret))
243 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__); 355 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
244 goto out; 356 else
245 } 357 sb->s_dirt = 0;
246 sb->s_dirt = 0; 358
247 359
360 unlock_super(sb);
248out: 361out:
249 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret); 362 EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
250 exofs_put_io_state(ios); 363 exofs_put_io_state(ios);
251 unlock_super(sb); 364 kfree(fscb);
252 return ret; 365 return ret;
253} 366}
254 367
@@ -292,13 +405,14 @@ static void exofs_put_super(struct super_block *sb)
292 int num_pend; 405 int num_pend;
293 struct exofs_sb_info *sbi = sb->s_fs_info; 406 struct exofs_sb_info *sbi = sb->s_fs_info;
294 407
295 if (sb->s_dirt)
296 exofs_write_super(sb);
297
298 /* make sure there are no pending commands */ 408 /* make sure there are no pending commands */
299 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0; 409 for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
300 num_pend = atomic_read(&sbi->s_curr_pending)) { 410 num_pend = atomic_read(&sbi->s_curr_pending)) {
301 wait_queue_head_t wq; 411 wait_queue_head_t wq;
412
413 printk(KERN_NOTICE "%s: !!Pending operations in flight. "
414 "This is a BUG. please report to osd-dev@open-osd.org\n",
415 __func__);
302 init_waitqueue_head(&wq); 416 init_waitqueue_head(&wq);
303 wait_event_timeout(wq, 417 wait_event_timeout(wq,
304 (atomic_read(&sbi->s_curr_pending) == 0), 418 (atomic_read(&sbi->s_curr_pending) == 0),
@@ -390,6 +504,23 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
390 return 0; 504 return 0;
391} 505}
392 506
507static unsigned __ra_pages(struct exofs_layout *layout)
508{
509 const unsigned _MIN_RA = 32; /* min 128K read-ahead */
510 unsigned ra_pages = layout->group_width * layout->stripe_unit /
511 PAGE_SIZE;
512 unsigned max_io_pages = exofs_max_io_pages(layout, ~0);
513
514 ra_pages *= 2; /* two stripes */
515 if (ra_pages < _MIN_RA)
516 ra_pages = roundup(_MIN_RA, ra_pages / 2);
517
518 if (ra_pages > max_io_pages)
519 ra_pages = max_io_pages;
520
521 return ra_pages;
522}
523
393/* @odi is valid only as long as @fscb_dev is valid */ 524/* @odi is valid only as long as @fscb_dev is valid */
394static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev, 525static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
395 struct osd_dev_info *odi) 526 struct osd_dev_info *odi)
@@ -495,7 +626,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
495 } 626 }
496 627
497 od = osduld_info_lookup(&odi); 628 od = osduld_info_lookup(&odi);
498 if (unlikely(IS_ERR(od))) { 629 if (IS_ERR(od)) {
499 ret = PTR_ERR(od); 630 ret = PTR_ERR(od);
500 EXOFS_ERR("ERROR: device requested is not found " 631 EXOFS_ERR("ERROR: device requested is not found "
501 "osd_name-%s =>%d\n", odi.osdname, ret); 632 "osd_name-%s =>%d\n", odi.osdname, ret);
@@ -558,9 +689,17 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
558 goto free_bdi; 689 goto free_bdi;
559 690
560 /* use mount options to fill superblock */ 691 /* use mount options to fill superblock */
561 od = osduld_path_lookup(opts->dev_name); 692 if (opts->is_osdname) {
693 struct osd_dev_info odi = {.systemid_len = 0};
694
695 odi.osdname_len = strlen(opts->dev_name);
696 odi.osdname = (u8 *)opts->dev_name;
697 od = osduld_info_lookup(&odi);
698 } else {
699 od = osduld_path_lookup(opts->dev_name);
700 }
562 if (IS_ERR(od)) { 701 if (IS_ERR(od)) {
563 ret = PTR_ERR(od); 702 ret = -EINVAL;
564 goto free_sbi; 703 goto free_sbi;
565 } 704 }
566 705
@@ -594,6 +733,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
594 goto free_sbi; 733 goto free_sbi;
595 734
596 sb->s_magic = le16_to_cpu(fscb.s_magic); 735 sb->s_magic = le16_to_cpu(fscb.s_magic);
736 /* NOTE: we read below to be backward compatible with old versions */
597 sbi->s_nextid = le64_to_cpu(fscb.s_nextid); 737 sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
598 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles); 738 sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
599 739
@@ -604,7 +744,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
604 ret = -EINVAL; 744 ret = -EINVAL;
605 goto free_sbi; 745 goto free_sbi;
606 } 746 }
607 if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) { 747 if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) {
608 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n", 748 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
609 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version)); 749 EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
610 ret = -EINVAL; 750 ret = -EINVAL;
@@ -622,7 +762,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
622 goto free_sbi; 762 goto free_sbi;
623 } 763 }
624 764
765 __sbi_read_stats(sbi);
766
625 /* set up operation vectors */ 767 /* set up operation vectors */
768 sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
626 sb->s_bdi = &sbi->bdi; 769 sb->s_bdi = &sbi->bdi;
627 sb->s_fs_info = sbi; 770 sb->s_fs_info = sbi;
628 sb->s_op = &exofs_sops; 771 sb->s_op = &exofs_sops;
@@ -652,6 +795,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
652 795
653 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0], 796 _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
654 sbi->layout.s_pid); 797 sbi->layout.s_pid);
798 if (opts->is_osdname)
799 kfree(opts->dev_name);
655 return 0; 800 return 0;
656 801
657free_sbi: 802free_sbi:
@@ -660,6 +805,8 @@ free_bdi:
660 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n", 805 EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
661 opts->dev_name, sbi->layout.s_pid, ret); 806 opts->dev_name, sbi->layout.s_pid, ret);
662 exofs_free_sbi(sbi); 807 exofs_free_sbi(sbi);
808 if (opts->is_osdname)
809 kfree(opts->dev_name);
663 return ret; 810 return ret;
664} 811}
665 812
@@ -677,7 +824,8 @@ static struct dentry *exofs_mount(struct file_system_type *type,
677 if (ret) 824 if (ret)
678 return ERR_PTR(ret); 825 return ERR_PTR(ret);
679 826
680 opts.dev_name = dev_name; 827 if (!opts.dev_name)
828 opts.dev_name = dev_name;
681 return mount_nodev(type, flags, &opts, exofs_fill_super); 829 return mount_nodev(type, flags, &opts, exofs_fill_super);
682} 830}
683 831
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 4b6825740dd5..b05acb796135 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
320 struct inode * inode = dentry->d_inode; 320 struct inode * inode = dentry->d_inode;
321 int len = *max_len; 321 int len = *max_len;
322 int type = FILEID_INO32_GEN; 322 int type = FILEID_INO32_GEN;
323 323
324 if (len < 2 || (connectable && len < 4)) 324 if (connectable && (len < 4)) {
325 *max_len = 4;
326 return 255;
327 } else if (len < 2) {
328 *max_len = 2;
325 return 255; 329 return 255;
330 }
326 331
327 len = 2; 332 len = 2;
328 fid->i32.ino = inode->i_ino; 333 fid->i32.ino = inode->i_ino;
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
369 /* 374 /*
370 * Try to get any dentry for the given file handle from the filesystem. 375 * Try to get any dentry for the given file handle from the filesystem.
371 */ 376 */
377 if (!nop || !nop->fh_to_dentry)
378 return ERR_PTR(-ESTALE);
372 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 379 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
373 if (!result) 380 if (!result)
374 result = ERR_PTR(-ESTALE); 381 result = ERR_PTR(-ESTALE);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 7b4180554a62..abea5a17c764 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -406,7 +406,7 @@ ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
406 return -EINVAL; 406 return -EINVAL;
407 if (!test_opt(dentry->d_sb, POSIX_ACL)) 407 if (!test_opt(dentry->d_sb, POSIX_ACL))
408 return -EOPNOTSUPP; 408 return -EOPNOTSUPP;
409 if (!is_owner_or_cap(dentry->d_inode)) 409 if (!inode_owner_or_capable(dentry->d_inode))
410 return -EPERM; 410 return -EPERM;
411 411
412 if (value) { 412 if (value) {
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 6346a2acf326..645be9e7ee47 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -110,7 +110,7 @@ extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int); 110extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
111 111
112/* ialloc.c */ 112/* ialloc.c */
113extern struct inode * ext2_new_inode (struct inode *, int); 113extern struct inode * ext2_new_inode (struct inode *, int, const struct qstr *);
114extern void ext2_free_inode (struct inode *); 114extern void ext2_free_inode (struct inode *);
115extern unsigned long ext2_count_free_inodes (struct super_block *); 115extern unsigned long ext2_count_free_inodes (struct super_block *);
116extern void ext2_check_inodes_bitmap (struct super_block *); 116extern void ext2_check_inodes_bitmap (struct super_block *);
@@ -174,3 +174,9 @@ ext2_group_first_block_no(struct super_block *sb, unsigned long group_no)
174 return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) + 174 return group_no * (ext2_fsblk_t)EXT2_BLOCKS_PER_GROUP(sb) +
175 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block); 175 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block);
176} 176}
177
178#define ext2_set_bit __test_and_set_bit_le
179#define ext2_clear_bit __test_and_clear_bit_le
180#define ext2_test_bit test_bit_le
181#define ext2_find_first_zero_bit find_first_zero_bit_le
182#define ext2_find_next_zero_bit find_next_zero_bit_le
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index ad70479aabff..ee9ed31948e1 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -429,7 +429,8 @@ found:
429 return group; 429 return group;
430} 430}
431 431
432struct inode *ext2_new_inode(struct inode *dir, int mode) 432struct inode *ext2_new_inode(struct inode *dir, int mode,
433 const struct qstr *qstr)
433{ 434{
434 struct super_block *sb; 435 struct super_block *sb;
435 struct buffer_head *bitmap_bh = NULL; 436 struct buffer_head *bitmap_bh = NULL;
@@ -585,7 +586,7 @@ got:
585 if (err) 586 if (err)
586 goto fail_free_drop; 587 goto fail_free_drop;
587 588
588 err = ext2_init_security(inode,dir); 589 err = ext2_init_security(inode, dir, qstr);
589 if (err) 590 if (err)
590 goto fail_free_drop; 591 goto fail_free_drop;
591 592
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index e7431309bdca..f81e250ac5c4 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -39,7 +39,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
39 if (ret) 39 if (ret)
40 return ret; 40 return ret;
41 41
42 if (!is_owner_or_cap(inode)) { 42 if (!inode_owner_or_capable(inode)) {
43 ret = -EACCES; 43 ret = -EACCES;
44 goto setflags_out; 44 goto setflags_out;
45 } 45 }
@@ -89,7 +89,7 @@ setflags_out:
89 case EXT2_IOC_GETVERSION: 89 case EXT2_IOC_GETVERSION:
90 return put_user(inode->i_generation, (int __user *) arg); 90 return put_user(inode->i_generation, (int __user *) arg);
91 case EXT2_IOC_SETVERSION: 91 case EXT2_IOC_SETVERSION:
92 if (!is_owner_or_cap(inode)) 92 if (!inode_owner_or_capable(inode))
93 return -EPERM; 93 return -EPERM;
94 ret = mnt_want_write(filp->f_path.mnt); 94 ret = mnt_want_write(filp->f_path.mnt);
95 if (ret) 95 if (ret)
@@ -115,7 +115,7 @@ setflags_out:
115 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) 115 if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
116 return -ENOTTY; 116 return -ENOTTY;
117 117
118 if (!is_owner_or_cap(inode)) 118 if (!inode_owner_or_capable(inode))
119 return -EACCES; 119 return -EACCES;
120 120
121 if (get_user(rsv_window_size, (int __user *)arg)) 121 if (get_user(rsv_window_size, (int __user *)arg))
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 2e1d8341d827..ed5c5d496ee9 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -104,7 +104,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
104 104
105 dquot_initialize(dir); 105 dquot_initialize(dir);
106 106
107 inode = ext2_new_inode(dir, mode); 107 inode = ext2_new_inode(dir, mode, &dentry->d_name);
108 if (IS_ERR(inode)) 108 if (IS_ERR(inode))
109 return PTR_ERR(inode); 109 return PTR_ERR(inode);
110 110
@@ -133,7 +133,7 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_
133 133
134 dquot_initialize(dir); 134 dquot_initialize(dir);
135 135
136 inode = ext2_new_inode (dir, mode); 136 inode = ext2_new_inode (dir, mode, &dentry->d_name);
137 err = PTR_ERR(inode); 137 err = PTR_ERR(inode);
138 if (!IS_ERR(inode)) { 138 if (!IS_ERR(inode)) {
139 init_special_inode(inode, inode->i_mode, rdev); 139 init_special_inode(inode, inode->i_mode, rdev);
@@ -159,7 +159,7 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry,
159 159
160 dquot_initialize(dir); 160 dquot_initialize(dir);
161 161
162 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO); 162 inode = ext2_new_inode (dir, S_IFLNK | S_IRWXUGO, &dentry->d_name);
163 err = PTR_ERR(inode); 163 err = PTR_ERR(inode);
164 if (IS_ERR(inode)) 164 if (IS_ERR(inode))
165 goto out; 165 goto out;
@@ -230,7 +230,7 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode)
230 230
231 inode_inc_link_count(dir); 231 inode_inc_link_count(dir);
232 232
233 inode = ext2_new_inode (dir, S_IFDIR | mode); 233 inode = ext2_new_inode(dir, S_IFDIR | mode, &dentry->d_name);
234 err = PTR_ERR(inode); 234 err = PTR_ERR(inode);
235 if (IS_ERR(inode)) 235 if (IS_ERR(inode))
236 goto out_dir; 236 goto out_dir;
@@ -344,7 +344,6 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); 344 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
345 if (!new_de) 345 if (!new_de)
346 goto out_dir; 346 goto out_dir;
347 inode_inc_link_count(old_inode);
348 ext2_set_link(new_dir, new_de, new_page, old_inode, 1); 347 ext2_set_link(new_dir, new_de, new_page, old_inode, 1);
349 new_inode->i_ctime = CURRENT_TIME_SEC; 348 new_inode->i_ctime = CURRENT_TIME_SEC;
350 if (dir_de) 349 if (dir_de)
@@ -356,12 +355,9 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
356 if (new_dir->i_nlink >= EXT2_LINK_MAX) 355 if (new_dir->i_nlink >= EXT2_LINK_MAX)
357 goto out_dir; 356 goto out_dir;
358 } 357 }
359 inode_inc_link_count(old_inode);
360 err = ext2_add_link(new_dentry, old_inode); 358 err = ext2_add_link(new_dentry, old_inode);
361 if (err) { 359 if (err)
362 inode_dec_link_count(old_inode);
363 goto out_dir; 360 goto out_dir;
364 }
365 if (dir_de) 361 if (dir_de)
366 inode_inc_link_count(new_dir); 362 inode_inc_link_count(new_dir);
367 } 363 }
@@ -369,12 +365,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
369 /* 365 /*
370 * Like most other Unix systems, set the ctime for inodes on a 366 * Like most other Unix systems, set the ctime for inodes on a
371 * rename. 367 * rename.
372 * inode_dec_link_count() will mark the inode dirty.
373 */ 368 */
374 old_inode->i_ctime = CURRENT_TIME_SEC; 369 old_inode->i_ctime = CURRENT_TIME_SEC;
370 mark_inode_dirty(old_inode);
375 371
376 ext2_delete_entry (old_de, old_page); 372 ext2_delete_entry (old_de, old_page);
377 inode_dec_link_count(old_inode);
378 373
379 if (dir_de) { 374 if (dir_de) {
380 if (old_dir != new_dir) 375 if (old_dir != new_dir)
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index a1a1c2184616..5e41cccff762 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -116,9 +116,11 @@ exit_ext2_xattr(void)
116# endif /* CONFIG_EXT2_FS_XATTR */ 116# endif /* CONFIG_EXT2_FS_XATTR */
117 117
118#ifdef CONFIG_EXT2_FS_SECURITY 118#ifdef CONFIG_EXT2_FS_SECURITY
119extern int ext2_init_security(struct inode *inode, struct inode *dir); 119extern int ext2_init_security(struct inode *inode, struct inode *dir,
120 const struct qstr *qstr);
120#else 121#else
121static inline int ext2_init_security(struct inode *inode, struct inode *dir) 122static inline int ext2_init_security(struct inode *inode, struct inode *dir,
123 const struct qstr *qstr)
122{ 124{
123 return 0; 125 return 0;
124} 126}
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 3004e15d5da5..5d979b4347b0 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -47,14 +47,15 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
47} 47}
48 48
49int 49int
50ext2_init_security(struct inode *inode, struct inode *dir) 50ext2_init_security(struct inode *inode, struct inode *dir,
51 const struct qstr *qstr)
51{ 52{
52 int err; 53 int err;
53 size_t len; 54 size_t len;
54 void *value; 55 void *value;
55 char *name; 56 char *name;
56 57
57 err = security_inode_init_security(inode, dir, &name, &value, &len); 58 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
58 if (err) { 59 if (err) {
59 if (err == -EOPNOTSUPP) 60 if (err == -EOPNOTSUPP)
60 return 0; 61 return 0;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e4fa49e6c539..9d021c0d472a 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -435,7 +435,7 @@ ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
435 return -EINVAL; 435 return -EINVAL;
436 if (!test_opt(inode->i_sb, POSIX_ACL)) 436 if (!test_opt(inode->i_sb, POSIX_ACL))
437 return -EOPNOTSUPP; 437 return -EOPNOTSUPP;
438 if (!is_owner_or_cap(inode)) 438 if (!inode_owner_or_capable(inode))
439 return -EPERM; 439 return -EPERM;
440 440
441 if (value) { 441 if (value) {
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 045995c8ce5a..153242187fce 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1991,6 +1991,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
1991 spin_unlock(sb_bgl_lock(sbi, group)); 1991 spin_unlock(sb_bgl_lock(sbi, group));
1992 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start); 1992 percpu_counter_sub(&sbi->s_freeblocks_counter, next - start);
1993 1993
1994 free_blocks -= next - start;
1994 /* Do not issue a TRIM on extents smaller than minblocks */ 1995 /* Do not issue a TRIM on extents smaller than minblocks */
1995 if ((next - start) < minblocks) 1996 if ((next - start) < minblocks)
1996 goto free_extent; 1997 goto free_extent;
@@ -2040,7 +2041,7 @@ free_extent:
2040 cond_resched(); 2041 cond_resched();
2041 2042
2042 /* No more suitable extents */ 2043 /* No more suitable extents */
2043 if ((free_blocks - count) < minblocks) 2044 if (free_blocks < minblocks)
2044 break; 2045 break;
2045 } 2046 }
2046 2047
@@ -2090,7 +2091,8 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2090 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); 2091 ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count);
2091 int ret = 0; 2092 int ret = 0;
2092 2093
2093 start = range->start >> sb->s_blocksize_bits; 2094 start = (range->start >> sb->s_blocksize_bits) +
2095 le32_to_cpu(es->s_first_data_block);
2094 len = range->len >> sb->s_blocksize_bits; 2096 len = range->len >> sb->s_blocksize_bits;
2095 minlen = range->minlen >> sb->s_blocksize_bits; 2097 minlen = range->minlen >> sb->s_blocksize_bits;
2096 trimmed = 0; 2098 trimmed = 0;
@@ -2099,10 +2101,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2099 return -EINVAL; 2101 return -EINVAL;
2100 if (start >= max_blks) 2102 if (start >= max_blks)
2101 goto out; 2103 goto out;
2102 if (start < le32_to_cpu(es->s_first_data_block)) {
2103 len -= le32_to_cpu(es->s_first_data_block) - start;
2104 start = le32_to_cpu(es->s_first_data_block);
2105 }
2106 if (start + len > max_blks) 2104 if (start + len > max_blks)
2107 len = max_blks - start; 2105 len = max_blks - start;
2108 2106
@@ -2129,10 +2127,15 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
2129 if (free_blocks < minlen) 2127 if (free_blocks < minlen)
2130 continue; 2128 continue;
2131 2129
2132 if (len >= EXT3_BLOCKS_PER_GROUP(sb)) 2130 /*
2133 len -= (EXT3_BLOCKS_PER_GROUP(sb) - first_block); 2131 * For all the groups except the last one, last block will
2134 else 2132 * always be EXT3_BLOCKS_PER_GROUP(sb), so we only need to
2133 * change it for the last group in which case first_block +
2134 * len < EXT3_BLOCKS_PER_GROUP(sb).
2135 */
2136 if (first_block + len < EXT3_BLOCKS_PER_GROUP(sb))
2135 last_block = first_block + len; 2137 last_block = first_block + len;
2138 len -= last_block - first_block;
2136 2139
2137 ret = ext3_trim_all_free(sb, group, first_block, 2140 ret = ext3_trim_all_free(sb, group, first_block,
2138 last_block, minlen); 2141 last_block, minlen);
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 9724aef22460..bfc2dc43681d 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -404,7 +404,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
404 * For other inodes, search forward from the parent directory's block 404 * For other inodes, search forward from the parent directory's block
405 * group to find a free inode. 405 * group to find a free inode.
406 */ 406 */
407struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) 407struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
408 const struct qstr *qstr, int mode)
408{ 409{
409 struct super_block *sb; 410 struct super_block *sb;
410 struct buffer_head *bitmap_bh = NULL; 411 struct buffer_head *bitmap_bh = NULL;
@@ -589,7 +590,7 @@ got:
589 if (err) 590 if (err)
590 goto fail_free_drop; 591 goto fail_free_drop;
591 592
592 err = ext3_init_security(handle,inode, dir); 593 err = ext3_init_security(handle, inode, dir, qstr);
593 if (err) 594 if (err)
594 goto fail_free_drop; 595 goto fail_free_drop;
595 596
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index fc080dd561f7..f4090bd2f345 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -38,7 +38,7 @@ long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
38 unsigned int oldflags; 38 unsigned int oldflags;
39 unsigned int jflag; 39 unsigned int jflag;
40 40
41 if (!is_owner_or_cap(inode)) 41 if (!inode_owner_or_capable(inode))
42 return -EACCES; 42 return -EACCES;
43 43
44 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
@@ -123,7 +123,7 @@ flags_out:
123 __u32 generation; 123 __u32 generation;
124 int err; 124 int err;
125 125
126 if (!is_owner_or_cap(inode)) 126 if (!inode_owner_or_capable(inode))
127 return -EPERM; 127 return -EPERM;
128 128
129 err = mnt_want_write(filp->f_path.mnt); 129 err = mnt_want_write(filp->f_path.mnt);
@@ -192,7 +192,7 @@ setversion_out:
192 if (err) 192 if (err)
193 return err; 193 return err;
194 194
195 if (!is_owner_or_cap(inode)) { 195 if (!inode_owner_or_capable(inode)) {
196 err = -EACCES; 196 err = -EACCES;
197 goto setrsvsz_out; 197 goto setrsvsz_out;
198 } 198 }
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index b27ba71810ec..32f3b8695859 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1540,8 +1540,8 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1540 goto cleanup; 1540 goto cleanup;
1541 node2 = (struct dx_node *)(bh2->b_data); 1541 node2 = (struct dx_node *)(bh2->b_data);
1542 entries2 = node2->entries; 1542 entries2 = node2->entries;
1543 memset(&node2->fake, 0, sizeof(struct fake_dirent));
1543 node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize); 1544 node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize);
1544 node2->fake.inode = 0;
1545 BUFFER_TRACE(frame->bh, "get_write_access"); 1545 BUFFER_TRACE(frame->bh, "get_write_access");
1546 err = ext3_journal_get_write_access(handle, frame->bh); 1546 err = ext3_journal_get_write_access(handle, frame->bh);
1547 if (err) 1547 if (err)
@@ -1710,7 +1710,7 @@ retry:
1710 if (IS_DIRSYNC(dir)) 1710 if (IS_DIRSYNC(dir))
1711 handle->h_sync = 1; 1711 handle->h_sync = 1;
1712 1712
1713 inode = ext3_new_inode (handle, dir, mode); 1713 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
1714 err = PTR_ERR(inode); 1714 err = PTR_ERR(inode);
1715 if (!IS_ERR(inode)) { 1715 if (!IS_ERR(inode)) {
1716 inode->i_op = &ext3_file_inode_operations; 1716 inode->i_op = &ext3_file_inode_operations;
@@ -1746,7 +1746,7 @@ retry:
1746 if (IS_DIRSYNC(dir)) 1746 if (IS_DIRSYNC(dir))
1747 handle->h_sync = 1; 1747 handle->h_sync = 1;
1748 1748
1749 inode = ext3_new_inode (handle, dir, mode); 1749 inode = ext3_new_inode (handle, dir, &dentry->d_name, mode);
1750 err = PTR_ERR(inode); 1750 err = PTR_ERR(inode);
1751 if (!IS_ERR(inode)) { 1751 if (!IS_ERR(inode)) {
1752 init_special_inode(inode, inode->i_mode, rdev); 1752 init_special_inode(inode, inode->i_mode, rdev);
@@ -1784,7 +1784,7 @@ retry:
1784 if (IS_DIRSYNC(dir)) 1784 if (IS_DIRSYNC(dir))
1785 handle->h_sync = 1; 1785 handle->h_sync = 1;
1786 1786
1787 inode = ext3_new_inode (handle, dir, S_IFDIR | mode); 1787 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode);
1788 err = PTR_ERR(inode); 1788 err = PTR_ERR(inode);
1789 if (IS_ERR(inode)) 1789 if (IS_ERR(inode))
1790 goto out_stop; 1790 goto out_stop;
@@ -2206,7 +2206,7 @@ retry:
2206 if (IS_DIRSYNC(dir)) 2206 if (IS_DIRSYNC(dir))
2207 handle->h_sync = 1; 2207 handle->h_sync = 1;
2208 2208
2209 inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); 2209 inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFLNK|S_IRWXUGO);
2210 err = PTR_ERR(inode); 2210 err = PTR_ERR(inode);
2211 if (IS_ERR(inode)) 2211 if (IS_ERR(inode))
2212 goto out_stop; 2212 goto out_stop;
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry,
2253 2253
2254 dquot_initialize(dir); 2254 dquot_initialize(dir);
2255 2255
2256 /*
2257 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2258 * otherwise has the potential to corrupt the orphan inode list.
2259 */
2260 if (inode->i_nlink == 0)
2261 return -ENOENT;
2262
2263retry: 2256retry:
2264 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + 2257 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2265 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2258 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 85c8cc8f2473..071689f86e18 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1464,6 +1464,13 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1464 return; 1464 return;
1465 } 1465 }
1466 1466
1467 /* Check if feature set allows readwrite operations */
1468 if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) {
1469 ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
1470 "unknown ROCOMPAT features");
1471 return;
1472 }
1473
1467 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { 1474 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1468 if (es->s_last_orphan) 1475 if (es->s_last_orphan)
1469 jbd_debug(1, "Errors on filesystem, " 1476 jbd_debug(1, "Errors on filesystem, "
@@ -1936,6 +1943,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1936 sb->s_qcop = &ext3_qctl_operations; 1943 sb->s_qcop = &ext3_qctl_operations;
1937 sb->dq_op = &ext3_quota_operations; 1944 sb->dq_op = &ext3_quota_operations;
1938#endif 1945#endif
1946 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
1939 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 1947 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
1940 mutex_init(&sbi->s_orphan_lock); 1948 mutex_init(&sbi->s_orphan_lock);
1941 mutex_init(&sbi->s_resize_lock); 1949 mutex_init(&sbi->s_resize_lock);
diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h
index 377fe7201169..2be4f69bfa64 100644
--- a/fs/ext3/xattr.h
+++ b/fs/ext3/xattr.h
@@ -128,10 +128,10 @@ exit_ext3_xattr(void)
128 128
129#ifdef CONFIG_EXT3_FS_SECURITY 129#ifdef CONFIG_EXT3_FS_SECURITY
130extern int ext3_init_security(handle_t *handle, struct inode *inode, 130extern int ext3_init_security(handle_t *handle, struct inode *inode,
131 struct inode *dir); 131 struct inode *dir, const struct qstr *qstr);
132#else 132#else
133static inline int ext3_init_security(handle_t *handle, struct inode *inode, 133static inline int ext3_init_security(handle_t *handle, struct inode *inode,
134 struct inode *dir) 134 struct inode *dir, const struct qstr *qstr)
135{ 135{
136 return 0; 136 return 0;
137} 137}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 03a99bfc59f9..b8d9f83aa5c5 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -49,14 +49,15 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
49} 49}
50 50
51int 51int
52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir) 52ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
53 const struct qstr *qstr)
53{ 54{
54 int err; 55 int err;
55 size_t len; 56 size_t len;
56 void *value; 57 void *value;
57 char *name; 58 char *name;
58 59
59 err = security_inode_init_security(inode, dir, &name, &value, &len); 60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
60 if (err) { 61 if (err) {
61 if (err == -EOPNOTSUPP) 62 if (err == -EOPNOTSUPP)
62 return 0; 63 return 0;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index e0270d1f8d82..21eacd7b7d79 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -433,7 +433,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
433 return -EINVAL; 433 return -EINVAL;
434 if (!test_opt(inode->i_sb, POSIX_ACL)) 434 if (!test_opt(inode->i_sb, POSIX_ACL))
435 return -EOPNOTSUPP; 435 return -EOPNOTSUPP;
436 if (!is_owner_or_cap(inode)) 436 if (!inode_owner_or_capable(inode))
437 return -EPERM; 437 return -EPERM;
438 438
439 if (value) { 439 if (value) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 3aa0b72b3b94..4daaf2b753f4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -923,14 +923,14 @@ struct ext4_inode_info {
923#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ 923#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
924 EXT4_MOUNT2_##opt) 924 EXT4_MOUNT2_##opt)
925 925
926#define ext4_set_bit ext2_set_bit 926#define ext4_set_bit __test_and_set_bit_le
927#define ext4_set_bit_atomic ext2_set_bit_atomic 927#define ext4_set_bit_atomic ext2_set_bit_atomic
928#define ext4_clear_bit ext2_clear_bit 928#define ext4_clear_bit __test_and_clear_bit_le
929#define ext4_clear_bit_atomic ext2_clear_bit_atomic 929#define ext4_clear_bit_atomic ext2_clear_bit_atomic
930#define ext4_test_bit ext2_test_bit 930#define ext4_test_bit test_bit_le
931#define ext4_find_first_zero_bit ext2_find_first_zero_bit 931#define ext4_find_first_zero_bit find_first_zero_bit_le
932#define ext4_find_next_zero_bit ext2_find_next_zero_bit 932#define ext4_find_next_zero_bit find_next_zero_bit_le
933#define ext4_find_next_bit ext2_find_next_bit 933#define ext4_find_next_bit find_next_bit_le
934 934
935/* 935/*
936 * Maximal mount counts between two filesystem checks 936 * Maximal mount counts between two filesystem checks
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ccce8a7e94ed..7516fb9c0bd5 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -131,7 +131,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
131 * fragmenting the file system's free space. Maybe we 131 * fragmenting the file system's free space. Maybe we
132 * should have some hueristics or some way to allow 132 * should have some hueristics or some way to allow
133 * userspace to pass a hint to file system, 133 * userspace to pass a hint to file system,
134 * especiially if the latter case turns out to be 134 * especially if the latter case turns out to be
135 * common. 135 * common.
136 */ 136 */
137 ex = path[depth].p_ext; 137 ex = path[depth].p_ext;
@@ -2844,7 +2844,7 @@ fix_extent_len:
2844 * ext4_get_blocks_dio_write() when DIO to write 2844 * ext4_get_blocks_dio_write() when DIO to write
2845 * to an uninitialized extent. 2845 * to an uninitialized extent.
2846 * 2846 *
2847 * Writing to an uninitized extent may result in splitting the uninitialized 2847 * Writing to an uninitialized extent may result in splitting the uninitialized
2848 * extent into multiple /initialized uninitialized extents (up to three) 2848 * extent into multiple /initialized uninitialized extents (up to three)
2849 * There are three possibilities: 2849 * There are three possibilities:
2850 * a> There is no split required: Entire extent should be uninitialized 2850 * a> There is no split required: Entire extent should be uninitialized
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index eb9097aec6f0..78b79e1bd7ed 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1042,7 +1042,7 @@ got:
1042 if (err) 1042 if (err)
1043 goto fail_free_drop; 1043 goto fail_free_drop;
1044 1044
1045 err = ext4_init_security(handle, inode, dir); 1045 err = ext4_init_security(handle, inode, dir, qstr);
1046 if (err) 1046 if (err)
1047 goto fail_free_drop; 1047 goto fail_free_drop;
1048 1048
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index eb3bc2fe647e..a84faa110bcd 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
38 unsigned int oldflags; 38 unsigned int oldflags;
39 unsigned int jflag; 39 unsigned int jflag;
40 40
41 if (!is_owner_or_cap(inode)) 41 if (!inode_owner_or_capable(inode))
42 return -EACCES; 42 return -EACCES;
43 43
44 if (get_user(flags, (int __user *) arg)) 44 if (get_user(flags, (int __user *) arg))
@@ -146,7 +146,7 @@ flags_out:
146 __u32 generation; 146 __u32 generation;
147 int err; 147 int err;
148 148
149 if (!is_owner_or_cap(inode)) 149 if (!inode_owner_or_capable(inode))
150 return -EPERM; 150 return -EPERM;
151 151
152 err = mnt_want_write(filp->f_path.mnt); 152 err = mnt_want_write(filp->f_path.mnt);
@@ -298,7 +298,7 @@ mext_out:
298 case EXT4_IOC_MIGRATE: 298 case EXT4_IOC_MIGRATE:
299 { 299 {
300 int err; 300 int err;
301 if (!is_owner_or_cap(inode)) 301 if (!inode_owner_or_capable(inode))
302 return -EACCES; 302 return -EACCES;
303 303
304 err = mnt_want_write(filp->f_path.mnt); 304 err = mnt_want_write(filp->f_path.mnt);
@@ -320,7 +320,7 @@ mext_out:
320 case EXT4_IOC_ALLOC_DA_BLKS: 320 case EXT4_IOC_ALLOC_DA_BLKS:
321 { 321 {
322 int err; 322 int err;
323 if (!is_owner_or_cap(inode)) 323 if (!inode_owner_or_capable(inode))
324 return -EACCES; 324 return -EACCES;
325 325
326 err = mnt_want_write(filp->f_path.mnt); 326 err = mnt_want_write(filp->f_path.mnt);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5485390d32c5..e781b7ea5630 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry,
2304 2304
2305 dquot_initialize(dir); 2305 dquot_initialize(dir);
2306 2306
2307 /*
2308 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
2309 * otherwise has the potential to corrupt the orphan inode list.
2310 */
2311 if (inode->i_nlink == 0)
2312 return -ENOENT;
2313
2314retry: 2307retry:
2315 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2308 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2316 EXT4_INDEX_EXTRA_TRANS_BLOCKS); 2309 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f6a318f836b2..203f9e4a70be 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3415 sb->s_qcop = &ext4_qctl_operations; 3415 sb->s_qcop = &ext4_qctl_operations;
3416 sb->dq_op = &ext4_quota_operations; 3416 sb->dq_op = &ext4_quota_operations;
3417#endif 3417#endif
3418 memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
3419
3418 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ 3420 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
3419 mutex_init(&sbi->s_orphan_lock); 3421 mutex_init(&sbi->s_orphan_lock);
3420 mutex_init(&sbi->s_resize_lock); 3422 mutex_init(&sbi->s_resize_lock);
@@ -3509,7 +3511,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3509 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); 3511 percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
3510 3512
3511no_journal: 3513no_journal:
3512 EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); 3514 /*
3515 * The maximum number of concurrent works can be high and
3516 * concurrency isn't really necessary. Limit it to 1.
3517 */
3518 EXT4_SB(sb)->dio_unwritten_wq =
3519 alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1);
3513 if (!EXT4_SB(sb)->dio_unwritten_wq) { 3520 if (!EXT4_SB(sb)->dio_unwritten_wq) {
3514 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); 3521 printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
3515 goto failed_mount_wq; 3522 goto failed_mount_wq;
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 1ef16520b950..25b7387ff183 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -145,10 +145,10 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
145 145
146#ifdef CONFIG_EXT4_FS_SECURITY 146#ifdef CONFIG_EXT4_FS_SECURITY
147extern int ext4_init_security(handle_t *handle, struct inode *inode, 147extern int ext4_init_security(handle_t *handle, struct inode *inode,
148 struct inode *dir); 148 struct inode *dir, const struct qstr *qstr);
149#else 149#else
150static inline int ext4_init_security(handle_t *handle, struct inode *inode, 150static inline int ext4_init_security(handle_t *handle, struct inode *inode,
151 struct inode *dir) 151 struct inode *dir, const struct qstr *qstr)
152{ 152{
153 return 0; 153 return 0;
154} 154}
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index 9b21268e121c..007c3bfbf094 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -49,14 +49,15 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
49} 49}
50 50
51int 51int
52ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir) 52ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
53 const struct qstr *qstr)
53{ 54{
54 int err; 55 int err;
55 size_t len; 56 size_t len;
56 void *value; 57 void *value;
57 char *name; 58 char *name;
58 59
59 err = security_inode_init_security(inode, dir, &name, &value, &len); 60 err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
60 if (err) { 61 if (err) {
61 if (err == -EOPNOTSUPP) 62 if (err == -EOPNOTSUPP)
62 return 0; 63 return 0;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 86753fe10bd1..0e277ec4b612 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
757 struct inode *inode = de->d_inode; 757 struct inode *inode = de->d_inode;
758 u32 ipos_h, ipos_m, ipos_l; 758 u32 ipos_h, ipos_m, ipos_l;
759 759
760 if (len < 5) 760 if (len < 5) {
761 *lenp = 5;
761 return 255; /* no room */ 762 return 255; /* no room */
763 }
762 764
763 ipos_h = MSDOS_I(inode)->i_pos >> 8; 765 ipos_h = MSDOS_I(inode)->i_pos >> 8;
764 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; 766 ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index f88f752babd9..adae3fb7451a 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -43,7 +43,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
43 43
44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) 44static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
45{ 45{
46 if (nd->flags & LOOKUP_RCU) 46 if (nd && nd->flags & LOOKUP_RCU)
47 return -ECHILD; 47 return -ECHILD;
48 48
49 /* This is not negative dentry. Always valid. */ 49 /* This is not negative dentry. Always valid. */
@@ -54,7 +54,7 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd)
54 54
55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) 55static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
56{ 56{
57 if (nd->flags & LOOKUP_RCU) 57 if (nd && nd->flags & LOOKUP_RCU)
58 return -ECHILD; 58 return -ECHILD;
59 59
60 /* 60 /*
diff --git a/fs/fcntl.c b/fs/fcntl.c
index cb1026181bdc..22764c7c8382 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
131SYSCALL_DEFINE1(dup, unsigned int, fildes) 131SYSCALL_DEFINE1(dup, unsigned int, fildes)
132{ 132{
133 int ret = -EBADF; 133 int ret = -EBADF;
134 struct file *file = fget(fildes); 134 struct file *file = fget_raw(fildes);
135 135
136 if (file) { 136 if (file) {
137 ret = get_unused_fd(); 137 ret = get_unused_fd();
@@ -159,7 +159,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
159 159
160 /* O_NOATIME can only be set by the owner or superuser */ 160 /* O_NOATIME can only be set by the owner or superuser */
161 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME)) 161 if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
162 if (!is_owner_or_cap(inode)) 162 if (!inode_owner_or_capable(inode))
163 return -EPERM; 163 return -EPERM;
164 164
165 /* required for strict SunOS emulation */ 165 /* required for strict SunOS emulation */
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
426 return err; 426 return err;
427} 427}
428 428
429static int check_fcntl_cmd(unsigned cmd)
430{
431 switch (cmd) {
432 case F_DUPFD:
433 case F_DUPFD_CLOEXEC:
434 case F_GETFD:
435 case F_SETFD:
436 case F_GETFL:
437 return 1;
438 }
439 return 0;
440}
441
429SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) 442SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
430{ 443{
431 struct file *filp; 444 struct file *filp;
432 long err = -EBADF; 445 long err = -EBADF;
433 446
434 filp = fget(fd); 447 filp = fget_raw(fd);
435 if (!filp) 448 if (!filp)
436 goto out; 449 goto out;
437 450
451 if (unlikely(filp->f_mode & FMODE_PATH)) {
452 if (!check_fcntl_cmd(cmd)) {
453 fput(filp);
454 goto out;
455 }
456 }
457
438 err = security_file_fcntl(filp, cmd, arg); 458 err = security_file_fcntl(filp, cmd, arg);
439 if (err) { 459 if (err) {
440 fput(filp); 460 fput(filp);
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
456 long err; 476 long err;
457 477
458 err = -EBADF; 478 err = -EBADF;
459 filp = fget(fd); 479 filp = fget_raw(fd);
460 if (!filp) 480 if (!filp)
461 goto out; 481 goto out;
462 482
483 if (unlikely(filp->f_mode & FMODE_PATH)) {
484 if (!check_fcntl_cmd(cmd)) {
485 fput(filp);
486 goto out;
487 }
488 }
489
463 err = security_file_fcntl(filp, cmd, arg); 490 err = security_file_fcntl(filp, cmd, arg);
464 if (err) { 491 if (err) {
465 fput(filp); 492 fput(filp);
@@ -808,14 +835,14 @@ static int __init fcntl_init(void)
808 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY 835 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
809 * is defined as O_NONBLOCK on some platforms and not on others. 836 * is defined as O_NONBLOCK on some platforms and not on others.
810 */ 837 */
811 BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( 838 BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
812 O_RDONLY | O_WRONLY | O_RDWR | 839 O_RDONLY | O_WRONLY | O_RDWR |
813 O_CREAT | O_EXCL | O_NOCTTY | 840 O_CREAT | O_EXCL | O_NOCTTY |
814 O_TRUNC | O_APPEND | /* O_NONBLOCK | */ 841 O_TRUNC | O_APPEND | /* O_NONBLOCK | */
815 __O_SYNC | O_DSYNC | FASYNC | 842 __O_SYNC | O_DSYNC | FASYNC |
816 O_DIRECT | O_LARGEFILE | O_DIRECTORY | 843 O_DIRECT | O_LARGEFILE | O_DIRECTORY |
817 O_NOFOLLOW | O_NOATIME | O_CLOEXEC | 844 O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
818 __FMODE_EXEC 845 __FMODE_EXEC | O_PATH
819 )); 846 ));
820 847
821 fasync_cache = kmem_cache_create("fasync_cache", 848 fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fhandle.c b/fs/fhandle.c
new file mode 100644
index 000000000000..bf93ad2bee07
--- /dev/null
+++ b/fs/fhandle.c
@@ -0,0 +1,265 @@
1#include <linux/syscalls.h>
2#include <linux/slab.h>
3#include <linux/fs.h>
4#include <linux/file.h>
5#include <linux/mount.h>
6#include <linux/namei.h>
7#include <linux/exportfs.h>
8#include <linux/fs_struct.h>
9#include <linux/fsnotify.h>
10#include <asm/uaccess.h>
11#include "internal.h"
12
13static long do_sys_name_to_handle(struct path *path,
14 struct file_handle __user *ufh,
15 int __user *mnt_id)
16{
17 long retval;
18 struct file_handle f_handle;
19 int handle_dwords, handle_bytes;
20 struct file_handle *handle = NULL;
21
22 /*
23 * We need t make sure wether the file system
24 * support decoding of the file handle
25 */
26 if (!path->mnt->mnt_sb->s_export_op ||
27 !path->mnt->mnt_sb->s_export_op->fh_to_dentry)
28 return -EOPNOTSUPP;
29
30 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle)))
31 return -EFAULT;
32
33 if (f_handle.handle_bytes > MAX_HANDLE_SZ)
34 return -EINVAL;
35
36 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
37 GFP_KERNEL);
38 if (!handle)
39 return -ENOMEM;
40
41 /* convert handle size to multiple of sizeof(u32) */
42 handle_dwords = f_handle.handle_bytes >> 2;
43
44 /* we ask for a non connected handle */
45 retval = exportfs_encode_fh(path->dentry,
46 (struct fid *)handle->f_handle,
47 &handle_dwords, 0);
48 handle->handle_type = retval;
49 /* convert handle size to bytes */
50 handle_bytes = handle_dwords * sizeof(u32);
51 handle->handle_bytes = handle_bytes;
52 if ((handle->handle_bytes > f_handle.handle_bytes) ||
53 (retval == 255) || (retval == -ENOSPC)) {
54 /* As per old exportfs_encode_fh documentation
55 * we could return ENOSPC to indicate overflow
56 * But file system returned 255 always. So handle
57 * both the values
58 */
59 /*
60 * set the handle size to zero so we copy only
61 * non variable part of the file_handle
62 */
63 handle_bytes = 0;
64 retval = -EOVERFLOW;
65 } else
66 retval = 0;
67 /* copy the mount id */
68 if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) ||
69 copy_to_user(ufh, handle,
70 sizeof(struct file_handle) + handle_bytes))
71 retval = -EFAULT;
72 kfree(handle);
73 return retval;
74}
75
76/**
77 * sys_name_to_handle_at: convert name to handle
78 * @dfd: directory relative to which name is interpreted if not absolute
79 * @name: name that should be converted to handle.
80 * @handle: resulting file handle
81 * @mnt_id: mount id of the file system containing the file
82 * @flag: flag value to indicate whether to follow symlink or not
83 *
84 * @handle->handle_size indicate the space available to store the
85 * variable part of the file handle in bytes. If there is not
86 * enough space, the field is updated to return the minimum
87 * value required.
88 */
89SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
90 struct file_handle __user *, handle, int __user *, mnt_id,
91 int, flag)
92{
93 struct path path;
94 int lookup_flags;
95 int err;
96
97 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
98 return -EINVAL;
99
100 lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
101 if (flag & AT_EMPTY_PATH)
102 lookup_flags |= LOOKUP_EMPTY;
103 err = user_path_at(dfd, name, lookup_flags, &path);
104 if (!err) {
105 err = do_sys_name_to_handle(&path, handle, mnt_id);
106 path_put(&path);
107 }
108 return err;
109}
110
111static struct vfsmount *get_vfsmount_from_fd(int fd)
112{
113 struct path path;
114
115 if (fd == AT_FDCWD) {
116 struct fs_struct *fs = current->fs;
117 spin_lock(&fs->lock);
118 path = fs->pwd;
119 mntget(path.mnt);
120 spin_unlock(&fs->lock);
121 } else {
122 int fput_needed;
123 struct file *file = fget_light(fd, &fput_needed);
124 if (!file)
125 return ERR_PTR(-EBADF);
126 path = file->f_path;
127 mntget(path.mnt);
128 fput_light(file, fput_needed);
129 }
130 return path.mnt;
131}
132
133static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
134{
135 return 1;
136}
137
138static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
139 struct path *path)
140{
141 int retval = 0;
142 int handle_dwords;
143
144 path->mnt = get_vfsmount_from_fd(mountdirfd);
145 if (IS_ERR(path->mnt)) {
146 retval = PTR_ERR(path->mnt);
147 goto out_err;
148 }
149 /* change the handle size to multiple of sizeof(u32) */
150 handle_dwords = handle->handle_bytes >> 2;
151 path->dentry = exportfs_decode_fh(path->mnt,
152 (struct fid *)handle->f_handle,
153 handle_dwords, handle->handle_type,
154 vfs_dentry_acceptable, NULL);
155 if (IS_ERR(path->dentry)) {
156 retval = PTR_ERR(path->dentry);
157 goto out_mnt;
158 }
159 return 0;
160out_mnt:
161 mntput(path->mnt);
162out_err:
163 return retval;
164}
165
166static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
167 struct path *path)
168{
169 int retval = 0;
170 struct file_handle f_handle;
171 struct file_handle *handle = NULL;
172
173 /*
174 * With handle we don't look at the execute bit on the
175 * the directory. Ideally we would like CAP_DAC_SEARCH.
176 * But we don't have that
177 */
178 if (!capable(CAP_DAC_READ_SEARCH)) {
179 retval = -EPERM;
180 goto out_err;
181 }
182 if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
183 retval = -EFAULT;
184 goto out_err;
185 }
186 if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
187 (f_handle.handle_bytes == 0)) {
188 retval = -EINVAL;
189 goto out_err;
190 }
191 handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
192 GFP_KERNEL);
193 if (!handle) {
194 retval = -ENOMEM;
195 goto out_err;
196 }
197 /* copy the full handle */
198 if (copy_from_user(handle, ufh,
199 sizeof(struct file_handle) +
200 f_handle.handle_bytes)) {
201 retval = -EFAULT;
202 goto out_handle;
203 }
204
205 retval = do_handle_to_path(mountdirfd, handle, path);
206
207out_handle:
208 kfree(handle);
209out_err:
210 return retval;
211}
212
213long do_handle_open(int mountdirfd,
214 struct file_handle __user *ufh, int open_flag)
215{
216 long retval = 0;
217 struct path path;
218 struct file *file;
219 int fd;
220
221 retval = handle_to_path(mountdirfd, ufh, &path);
222 if (retval)
223 return retval;
224
225 fd = get_unused_fd_flags(open_flag);
226 if (fd < 0) {
227 path_put(&path);
228 return fd;
229 }
230 file = file_open_root(path.dentry, path.mnt, "", open_flag);
231 if (IS_ERR(file)) {
232 put_unused_fd(fd);
233 retval = PTR_ERR(file);
234 } else {
235 retval = fd;
236 fsnotify_open(file);
237 fd_install(fd, file);
238 }
239 path_put(&path);
240 return retval;
241}
242
243/**
244 * sys_open_by_handle_at: Open the file handle
245 * @mountdirfd: directory file descriptor
246 * @handle: file handle to be opened
247 * @flag: open flags.
248 *
249 * @mountdirfd indicate the directory file descriptor
250 * of the mount point. file handle is decoded relative
251 * to the vfsmount pointed by the @mountdirfd. @flags
252 * value is same as the open(2) flags.
253 */
254SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd,
255 struct file_handle __user *, handle,
256 int, flags)
257{
258 long ret;
259
260 if (force_o_largefile())
261 flags |= O_LARGEFILE;
262
263 ret = do_handle_open(mountdirfd, handle, flags);
264 return ret;
265}
diff --git a/fs/fifo.c b/fs/fifo.c
index 4e303c22d5ee..b1a524d798e7 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -66,8 +66,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
66 /* suppress POLLHUP until we have 66 /* suppress POLLHUP until we have
67 * seen a writer */ 67 * seen a writer */
68 filp->f_version = pipe->w_counter; 68 filp->f_version = pipe->w_counter;
69 } else 69 } else {
70 {
71 wait_for_partner(inode, &pipe->w_counter); 70 wait_for_partner(inode, &pipe->w_counter);
72 if(signal_pending(current)) 71 if(signal_pending(current))
73 goto err_rd; 72 goto err_rd;
diff --git a/fs/file_table.c b/fs/file_table.c
index eb36b6b17e26..01e4c1e8e6b6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -190,7 +190,8 @@ struct file *alloc_file(struct path *path, fmode_t mode,
190 file_take_write(file); 190 file_take_write(file);
191 WARN_ON(mnt_clone_write(path->mnt)); 191 WARN_ON(mnt_clone_write(path->mnt));
192 } 192 }
193 ima_counts_get(file); 193 if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
194 i_readcount_inc(path->dentry->d_inode);
194 return file; 195 return file;
195} 196}
196EXPORT_SYMBOL(alloc_file); 197EXPORT_SYMBOL(alloc_file);
@@ -246,11 +247,15 @@ static void __fput(struct file *file)
246 file->f_op->release(inode, file); 247 file->f_op->release(inode, file);
247 security_file_free(file); 248 security_file_free(file);
248 ima_file_free(file); 249 ima_file_free(file);
249 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL)) 250 if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL &&
251 !(file->f_mode & FMODE_PATH))) {
250 cdev_put(inode->i_cdev); 252 cdev_put(inode->i_cdev);
253 }
251 fops_put(file->f_op); 254 fops_put(file->f_op);
252 put_pid(file->f_owner.pid); 255 put_pid(file->f_owner.pid);
253 file_sb_list_del(file); 256 file_sb_list_del(file);
257 if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
258 i_readcount_dec(inode);
254 if (file->f_mode & FMODE_WRITE) 259 if (file->f_mode & FMODE_WRITE)
255 drop_file_write_access(file); 260 drop_file_write_access(file);
256 file->f_path.dentry = NULL; 261 file->f_path.dentry = NULL;
@@ -276,11 +281,10 @@ struct file *fget(unsigned int fd)
276 rcu_read_lock(); 281 rcu_read_lock();
277 file = fcheck_files(files, fd); 282 file = fcheck_files(files, fd);
278 if (file) { 283 if (file) {
279 if (!atomic_long_inc_not_zero(&file->f_count)) { 284 /* File object ref couldn't be taken */
280 /* File object ref couldn't be taken */ 285 if (file->f_mode & FMODE_PATH ||
281 rcu_read_unlock(); 286 !atomic_long_inc_not_zero(&file->f_count))
282 return NULL; 287 file = NULL;
283 }
284 } 288 }
285 rcu_read_unlock(); 289 rcu_read_unlock();
286 290
@@ -289,6 +293,25 @@ struct file *fget(unsigned int fd)
289 293
290EXPORT_SYMBOL(fget); 294EXPORT_SYMBOL(fget);
291 295
296struct file *fget_raw(unsigned int fd)
297{
298 struct file *file;
299 struct files_struct *files = current->files;
300
301 rcu_read_lock();
302 file = fcheck_files(files, fd);
303 if (file) {
304 /* File object ref couldn't be taken */
305 if (!atomic_long_inc_not_zero(&file->f_count))
306 file = NULL;
307 }
308 rcu_read_unlock();
309
310 return file;
311}
312
313EXPORT_SYMBOL(fget_raw);
314
292/* 315/*
293 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 316 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
294 * 317 *
@@ -313,6 +336,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
313 *fput_needed = 0; 336 *fput_needed = 0;
314 if (atomic_read(&files->count) == 1) { 337 if (atomic_read(&files->count) == 1) {
315 file = fcheck_files(files, fd); 338 file = fcheck_files(files, fd);
339 if (file && (file->f_mode & FMODE_PATH))
340 file = NULL;
341 } else {
342 rcu_read_lock();
343 file = fcheck_files(files, fd);
344 if (file) {
345 if (!(file->f_mode & FMODE_PATH) &&
346 atomic_long_inc_not_zero(&file->f_count))
347 *fput_needed = 1;
348 else
349 /* Didn't get the reference, someone's freed */
350 file = NULL;
351 }
352 rcu_read_unlock();
353 }
354
355 return file;
356}
357
358struct file *fget_raw_light(unsigned int fd, int *fput_needed)
359{
360 struct file *file;
361 struct files_struct *files = current->files;
362
363 *fput_needed = 0;
364 if (atomic_read(&files->count) == 1) {
365 file = fcheck_files(files, fd);
316 } else { 366 } else {
317 rcu_read_lock(); 367 rcu_read_lock();
318 file = fcheck_files(files, fd); 368 file = fcheck_files(files, fd);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 3e87cce5837d..b6cca47f7b07 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -305,7 +305,7 @@ static void cuse_gendev_release(struct device *dev)
305static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req) 305static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
306{ 306{
307 struct cuse_conn *cc = fc_to_cc(fc); 307 struct cuse_conn *cc = fc_to_cc(fc);
308 struct cuse_init_out *arg = &req->misc.cuse_init_out; 308 struct cuse_init_out *arg = req->out.args[0].value;
309 struct page *page = req->pages[0]; 309 struct page *page = req->pages[0];
310 struct cuse_devinfo devinfo = { }; 310 struct cuse_devinfo devinfo = { };
311 struct device *dev; 311 struct device *dev;
@@ -384,6 +384,7 @@ static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
384 dev_set_uevent_suppress(dev, 0); 384 dev_set_uevent_suppress(dev, 0);
385 kobject_uevent(&dev->kobj, KOBJ_ADD); 385 kobject_uevent(&dev->kobj, KOBJ_ADD);
386out: 386out:
387 kfree(arg);
387 __free_page(page); 388 __free_page(page);
388 return; 389 return;
389 390
@@ -405,6 +406,7 @@ static int cuse_send_init(struct cuse_conn *cc)
405 struct page *page; 406 struct page *page;
406 struct fuse_conn *fc = &cc->fc; 407 struct fuse_conn *fc = &cc->fc;
407 struct cuse_init_in *arg; 408 struct cuse_init_in *arg;
409 void *outarg;
408 410
409 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE); 411 BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
410 412
@@ -419,6 +421,10 @@ static int cuse_send_init(struct cuse_conn *cc)
419 if (!page) 421 if (!page)
420 goto err_put_req; 422 goto err_put_req;
421 423
424 outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
425 if (!outarg)
426 goto err_free_page;
427
422 arg = &req->misc.cuse_init_in; 428 arg = &req->misc.cuse_init_in;
423 arg->major = FUSE_KERNEL_VERSION; 429 arg->major = FUSE_KERNEL_VERSION;
424 arg->minor = FUSE_KERNEL_MINOR_VERSION; 430 arg->minor = FUSE_KERNEL_MINOR_VERSION;
@@ -429,7 +435,7 @@ static int cuse_send_init(struct cuse_conn *cc)
429 req->in.args[0].value = arg; 435 req->in.args[0].value = arg;
430 req->out.numargs = 2; 436 req->out.numargs = 2;
431 req->out.args[0].size = sizeof(struct cuse_init_out); 437 req->out.args[0].size = sizeof(struct cuse_init_out);
432 req->out.args[0].value = &req->misc.cuse_init_out; 438 req->out.args[0].value = outarg;
433 req->out.args[1].size = CUSE_INIT_INFO_MAX; 439 req->out.args[1].size = CUSE_INIT_INFO_MAX;
434 req->out.argvar = 1; 440 req->out.argvar = 1;
435 req->out.argpages = 1; 441 req->out.argpages = 1;
@@ -440,6 +446,8 @@ static int cuse_send_init(struct cuse_conn *cc)
440 446
441 return 0; 447 return 0;
442 448
449err_free_page:
450 __free_page(page);
443err_put_req: 451err_put_req:
444 fuse_put_request(fc, req); 452 fuse_put_request(fc, req);
445err: 453err:
@@ -458,7 +466,7 @@ static void cuse_fc_release(struct fuse_conn *fc)
458 * @file: file struct being opened 466 * @file: file struct being opened
459 * 467 *
460 * Userland CUSE server can create a CUSE device by opening /dev/cuse 468 * Userland CUSE server can create a CUSE device by opening /dev/cuse
461 * and replying to the initilaization request kernel sends. This 469 * and replying to the initialization request kernel sends. This
462 * function is responsible for handling CUSE device initialization. 470 * function is responsible for handling CUSE device initialization.
463 * Because the fd opened by this function is used during 471 * Because the fd opened by this function is used during
464 * initialization, this function only creates cuse_conn and sends 472 * initialization, this function only creates cuse_conn and sends
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cf8d28d1fbad..640fc229df10 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -737,14 +737,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
737 if (WARN_ON(PageMlocked(oldpage))) 737 if (WARN_ON(PageMlocked(oldpage)))
738 goto out_fallback_unlock; 738 goto out_fallback_unlock;
739 739
740 remove_from_page_cache(oldpage); 740 err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
741 page_cache_release(oldpage);
742
743 err = add_to_page_cache_locked(newpage, mapping, index, GFP_KERNEL);
744 if (err) { 741 if (err) {
745 printk(KERN_WARNING "fuse_try_move_page: failed to add page"); 742 unlock_page(newpage);
746 goto out_fallback_unlock; 743 return err;
747 } 744 }
745
748 page_cache_get(newpage); 746 page_cache_get(newpage);
749 747
750 if (!(buf->flags & PIPE_BUF_FLAG_LRU)) 748 if (!(buf->flags & PIPE_BUF_FLAG_LRU))
@@ -1910,6 +1908,21 @@ __acquires(fc->lock)
1910 kfree(dequeue_forget(fc, 1, NULL)); 1908 kfree(dequeue_forget(fc, 1, NULL));
1911} 1909}
1912 1910
1911static void end_polls(struct fuse_conn *fc)
1912{
1913 struct rb_node *p;
1914
1915 p = rb_first(&fc->polled_files);
1916
1917 while (p) {
1918 struct fuse_file *ff;
1919 ff = rb_entry(p, struct fuse_file, polled_node);
1920 wake_up_interruptible_all(&ff->poll_wait);
1921
1922 p = rb_next(p);
1923 }
1924}
1925
1913/* 1926/*
1914 * Abort all requests. 1927 * Abort all requests.
1915 * 1928 *
@@ -1937,6 +1950,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
1937 fc->blocked = 0; 1950 fc->blocked = 0;
1938 end_io_requests(fc); 1951 end_io_requests(fc);
1939 end_queued_requests(fc); 1952 end_queued_requests(fc);
1953 end_polls(fc);
1940 wake_up_all(&fc->waitq); 1954 wake_up_all(&fc->waitq);
1941 wake_up_all(&fc->blocked_waitq); 1955 wake_up_all(&fc->blocked_waitq);
1942 kill_fasync(&fc->fasync, SIGIO, POLL_IN); 1956 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
@@ -1953,6 +1967,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
1953 fc->connected = 0; 1967 fc->connected = 0;
1954 fc->blocked = 0; 1968 fc->blocked = 0;
1955 end_queued_requests(fc); 1969 end_queued_requests(fc);
1970 end_polls(fc);
1956 wake_up_all(&fc->blocked_waitq); 1971 wake_up_all(&fc->blocked_waitq);
1957 spin_unlock(&fc->lock); 1972 spin_unlock(&fc->lock);
1958 fuse_conn_put(fc); 1973 fuse_conn_put(fc);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 83543b5ff941..c6ba49bd95b3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -158,10 +158,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
158{ 158{
159 struct inode *inode; 159 struct inode *inode;
160 160
161 if (nd->flags & LOOKUP_RCU) 161 inode = ACCESS_ONCE(entry->d_inode);
162 return -ECHILD;
163
164 inode = entry->d_inode;
165 if (inode && is_bad_inode(inode)) 162 if (inode && is_bad_inode(inode))
166 return 0; 163 return 0;
167 else if (fuse_dentry_time(entry) < get_jiffies_64()) { 164 else if (fuse_dentry_time(entry) < get_jiffies_64()) {
@@ -177,6 +174,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
177 if (!inode) 174 if (!inode)
178 return 0; 175 return 0;
179 176
177 if (nd->flags & LOOKUP_RCU)
178 return -ECHILD;
179
180 fc = get_fuse_conn(inode); 180 fc = get_fuse_conn(inode);
181 req = fuse_get_req(fc); 181 req = fuse_get_req(fc);
182 if (IS_ERR(req)) 182 if (IS_ERR(req))
@@ -970,6 +970,14 @@ static int fuse_access(struct inode *inode, int mask)
970 return err; 970 return err;
971} 971}
972 972
973static int fuse_perm_getattr(struct inode *inode, int flags)
974{
975 if (flags & IPERM_FLAG_RCU)
976 return -ECHILD;
977
978 return fuse_do_getattr(inode, NULL, NULL);
979}
980
973/* 981/*
974 * Check permission. The two basic access models of FUSE are: 982 * Check permission. The two basic access models of FUSE are:
975 * 983 *
@@ -989,9 +997,6 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
989 bool refreshed = false; 997 bool refreshed = false;
990 int err = 0; 998 int err = 0;
991 999
992 if (flags & IPERM_FLAG_RCU)
993 return -ECHILD;
994
995 if (!fuse_allow_task(fc, current)) 1000 if (!fuse_allow_task(fc, current))
996 return -EACCES; 1001 return -EACCES;
997 1002
@@ -1000,9 +1005,15 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1000 */ 1005 */
1001 if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) || 1006 if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
1002 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { 1007 ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1003 err = fuse_update_attributes(inode, NULL, NULL, &refreshed); 1008 struct fuse_inode *fi = get_fuse_inode(inode);
1004 if (err) 1009
1005 return err; 1010 if (fi->i_time < get_jiffies_64()) {
1011 refreshed = true;
1012
1013 err = fuse_perm_getattr(inode, flags);
1014 if (err)
1015 return err;
1016 }
1006 } 1017 }
1007 1018
1008 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1019 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
@@ -1012,7 +1023,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1012 attributes. This is also needed, because the root 1023 attributes. This is also needed, because the root
1013 node will at first have no permissions */ 1024 node will at first have no permissions */
1014 if (err == -EACCES && !refreshed) { 1025 if (err == -EACCES && !refreshed) {
1015 err = fuse_do_getattr(inode, NULL, NULL); 1026 err = fuse_perm_getattr(inode, flags);
1016 if (!err) 1027 if (!err)
1017 err = generic_permission(inode, mask, 1028 err = generic_permission(inode, mask,
1018 flags, NULL); 1029 flags, NULL);
@@ -1023,13 +1034,16 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1023 noticed immediately, only after the attribute 1034 noticed immediately, only after the attribute
1024 timeout has expired */ 1035 timeout has expired */
1025 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1036 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1037 if (flags & IPERM_FLAG_RCU)
1038 return -ECHILD;
1039
1026 err = fuse_access(inode, mask); 1040 err = fuse_access(inode, mask);
1027 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 1041 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
1028 if (!(inode->i_mode & S_IXUGO)) { 1042 if (!(inode->i_mode & S_IXUGO)) {
1029 if (refreshed) 1043 if (refreshed)
1030 return -EACCES; 1044 return -EACCES;
1031 1045
1032 err = fuse_do_getattr(inode, NULL, NULL); 1046 err = fuse_perm_getattr(inode, flags);
1033 if (!err && !(inode->i_mode & S_IXUGO)) 1047 if (!err && !(inode->i_mode & S_IXUGO))
1034 return -EACCES; 1048 return -EACCES;
1035 } 1049 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9e0832dbb1e3..6ea00734984e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -222,7 +222,7 @@ static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
222 rb_erase(&ff->polled_node, &fc->polled_files); 222 rb_erase(&ff->polled_node, &fc->polled_files);
223 spin_unlock(&fc->lock); 223 spin_unlock(&fc->lock);
224 224
225 wake_up_interruptible_sync(&ff->poll_wait); 225 wake_up_interruptible_all(&ff->poll_wait);
226 226
227 inarg->fh = ff->fh; 227 inarg->fh = ff->fh;
228 inarg->flags = flags; 228 inarg->flags = flags;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d4286947bc2c..b788becada76 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,7 +272,6 @@ struct fuse_req {
272 struct fuse_init_in init_in; 272 struct fuse_init_in init_in;
273 struct fuse_init_out init_out; 273 struct fuse_init_out init_out;
274 struct cuse_init_in cuse_init_in; 274 struct cuse_init_in cuse_init_in;
275 struct cuse_init_out cuse_init_out;
276 struct { 275 struct {
277 struct fuse_read_in in; 276 struct fuse_read_in in;
278 u64 attr_ver; 277 u64 attr_ver;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9e3f68cc1bd1..051b1a084528 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
637 u64 nodeid; 637 u64 nodeid;
638 u32 generation; 638 u32 generation;
639 639
640 if (*max_len < len) 640 if (*max_len < len) {
641 *max_len = len;
641 return 255; 642 return 255;
643 }
642 644
643 nodeid = get_fuse_inode(inode)->nodeid; 645 nodeid = get_fuse_inode(inode)->nodeid;
644 generation = inode->i_generation; 646 generation = inode->i_generation;
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 06c48a891832..8f26d1a58912 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -74,7 +74,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
74 return -EINVAL; 74 return -EINVAL;
75 if (S_ISLNK(inode->i_mode)) 75 if (S_ISLNK(inode->i_mode))
76 return -EOPNOTSUPP; 76 return -EOPNOTSUPP;
77 if (!is_owner_or_cap(inode)) 77 if (!inode_owner_or_capable(inode))
78 return -EPERM; 78 return -EPERM;
79 if (value) { 79 if (value) {
80 acl = posix_acl_from_xattr(value, size); 80 acl = posix_acl_from_xattr(value, size);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 21f7e46da4c0..f3d23ef4e876 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS := -I$(src) 1ccflags-y := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o inode.o log.o lops.o main.o meta_io.o \
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 7118f1a780a9..cbc07155b1a0 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU) 83 if (flags & IPERM_FLAG_RCU) {
84 return -ECHILD; 84 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
85 return -ECHILD;
86 return -EAGAIN;
87 }
85 88
86 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); 89 acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
87 if (IS_ERR(acl)) 90 if (IS_ERR(acl))
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 4f36f8832b9b..aad77e4f61b5 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -695,6 +695,7 @@ out:
695 if (error == 0) 695 if (error == 0)
696 return 0; 696 return 0;
697 697
698 unlock_page(page);
698 page_cache_release(page); 699 page_cache_release(page);
699 700
700 gfs2_trans_end(sdp); 701 gfs2_trans_end(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3c4039d5eef1..ef3dc4b9fae2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -21,6 +21,7 @@
21#include "meta_io.h" 21#include "meta_io.h"
22#include "quota.h" 22#include "quota.h"
23#include "rgrp.h" 23#include "rgrp.h"
24#include "super.h"
24#include "trans.h" 25#include "trans.h"
25#include "dir.h" 26#include "dir.h"
26#include "util.h" 27#include "util.h"
@@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
757 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 758 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
758 struct gfs2_rgrp_list rlist; 759 struct gfs2_rgrp_list rlist;
759 u64 bn, bstart; 760 u64 bn, bstart;
760 u32 blen; 761 u32 blen, btotal;
761 __be64 *p; 762 __be64 *p;
762 unsigned int rg_blocks = 0; 763 unsigned int rg_blocks = 0;
763 int metadata; 764 int metadata;
@@ -839,6 +840,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
839 840
840 bstart = 0; 841 bstart = 0;
841 blen = 0; 842 blen = 0;
843 btotal = 0;
842 844
843 for (p = top; p < bottom; p++) { 845 for (p = top; p < bottom; p++) {
844 if (!*p) 846 if (!*p)
@@ -851,9 +853,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
851 else { 853 else {
852 if (bstart) { 854 if (bstart) {
853 if (metadata) 855 if (metadata)
854 gfs2_free_meta(ip, bstart, blen); 856 __gfs2_free_meta(ip, bstart, blen);
855 else 857 else
856 gfs2_free_data(ip, bstart, blen); 858 __gfs2_free_data(ip, bstart, blen);
859
860 btotal += blen;
857 } 861 }
858 862
859 bstart = bn; 863 bstart = bn;
@@ -865,11 +869,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
865 } 869 }
866 if (bstart) { 870 if (bstart) {
867 if (metadata) 871 if (metadata)
868 gfs2_free_meta(ip, bstart, blen); 872 __gfs2_free_meta(ip, bstart, blen);
869 else 873 else
870 gfs2_free_data(ip, bstart, blen); 874 __gfs2_free_data(ip, bstart, blen);
875
876 btotal += blen;
871 } 877 }
872 878
879 gfs2_statfs_change(sdp, 0, +btotal, 0);
880 gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
881 ip->i_inode.i_gid);
882
873 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; 883 ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
874 884
875 gfs2_dinode_out(ip, dibh->b_data); 885 gfs2_dinode_out(ip, dibh->b_data);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 4a456338b873..0da8da2c991d 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
44 int error; 44 int error;
45 int had_lock = 0; 45 int had_lock = 0;
46 46
47 if (nd->flags & LOOKUP_RCU) 47 if (nd && nd->flags & LOOKUP_RCU)
48 return -ECHILD; 48 return -ECHILD;
49 49
50 parent = dget_parent(dentry); 50 parent = dget_parent(dentry);
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9023db8184f9..b5a5e60df0d5 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
36 struct super_block *sb = inode->i_sb; 36 struct super_block *sb = inode->i_sb;
37 struct gfs2_inode *ip = GFS2_I(inode); 37 struct gfs2_inode *ip = GFS2_I(inode);
38 38
39 if (*len < GFS2_SMALL_FH_SIZE || 39 if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
40 (connectable && *len < GFS2_LARGE_FH_SIZE)) 40 *len = GFS2_LARGE_FH_SIZE;
41 return 255; 41 return 255;
42 } else if (*len < GFS2_SMALL_FH_SIZE) {
43 *len = GFS2_SMALL_FH_SIZE;
44 return 255;
45 }
42 46
43 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); 47 fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32);
44 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); 48 fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7cfdcb913363..b2682e073eee 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -221,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
221 goto out_drop_write; 221 goto out_drop_write;
222 222
223 error = -EACCES; 223 error = -EACCES;
224 if (!is_owner_or_cap(inode)) 224 if (!inode_owner_or_capable(inode))
225 goto out; 225 goto out;
226 226
227 error = 0; 227 error = 0;
@@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma)
448{ 448{
449 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); 449 struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
450 450
451 if (!(file->f_flags & O_NOATIME)) { 451 if (!(file->f_flags & O_NOATIME) &&
452 !IS_NOATIME(&ip->i_inode)) {
452 struct gfs2_holder i_gh; 453 struct gfs2_holder i_gh;
453 int error; 454 int error;
454 455
455 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 456 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
456 error = gfs2_glock_nq(&i_gh); 457 error = gfs2_glock_nq(&i_gh);
457 file_accessed(file); 458 if (error == 0) {
458 if (error == 0) 459 file_accessed(file);
459 gfs2_glock_dq_uninit(&i_gh); 460 gfs2_glock_dq(&i_gh);
461 }
462 gfs2_holder_uninit(&i_gh);
463 if (error)
464 return error;
460 } 465 }
461 vma->vm_ops = &gfs2_vm_ops; 466 vma->vm_ops = &gfs2_vm_ops;
462 vma->vm_flags |= VM_CAN_NONLINEAR; 467 vma->vm_flags |= VM_CAN_NONLINEAR;
@@ -617,8 +622,7 @@ static void empty_write_end(struct page *page, unsigned from,
617{ 622{
618 struct gfs2_inode *ip = GFS2_I(page->mapping->host); 623 struct gfs2_inode *ip = GFS2_I(page->mapping->host);
619 624
620 page_zero_new_buffers(page, from, to); 625 zero_user(page, from, to-from);
621 flush_dcache_page(page);
622 mark_page_accessed(page); 626 mark_page_accessed(page);
623 627
624 if (!gfs2_is_writeback(ip)) 628 if (!gfs2_is_writeback(ip))
@@ -627,36 +631,43 @@ static void empty_write_end(struct page *page, unsigned from,
627 block_commit_write(page, from, to); 631 block_commit_write(page, from, to);
628} 632}
629 633
630static int write_empty_blocks(struct page *page, unsigned from, unsigned to) 634static int needs_empty_write(sector_t block, struct inode *inode)
631{ 635{
632 unsigned start, end, next;
633 struct buffer_head *bh, *head;
634 int error; 636 int error;
637 struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
635 638
636 if (!page_has_buffers(page)) { 639 bh_map.b_size = 1 << inode->i_blkbits;
637 error = __block_write_begin(page, from, to - from, gfs2_block_map); 640 error = gfs2_block_map(inode, block, &bh_map, 0);
638 if (unlikely(error)) 641 if (unlikely(error))
639 return error; 642 return error;
643 return !buffer_mapped(&bh_map);
644}
640 645
641 empty_write_end(page, from, to); 646static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
642 return 0; 647{
643 } 648 struct inode *inode = page->mapping->host;
649 unsigned start, end, next, blksize;
650 sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
651 int ret;
644 652
645 bh = head = page_buffers(page); 653 blksize = 1 << inode->i_blkbits;
646 next = end = 0; 654 next = end = 0;
647 while (next < from) { 655 while (next < from) {
648 next += bh->b_size; 656 next += blksize;
649 bh = bh->b_this_page; 657 block++;
650 } 658 }
651 start = next; 659 start = next;
652 do { 660 do {
653 next += bh->b_size; 661 next += blksize;
654 if (buffer_mapped(bh)) { 662 ret = needs_empty_write(block, inode);
663 if (unlikely(ret < 0))
664 return ret;
665 if (ret == 0) {
655 if (end) { 666 if (end) {
656 error = __block_write_begin(page, start, end - start, 667 ret = __block_write_begin(page, start, end - start,
657 gfs2_block_map); 668 gfs2_block_map);
658 if (unlikely(error)) 669 if (unlikely(ret))
659 return error; 670 return ret;
660 empty_write_end(page, start, end); 671 empty_write_end(page, start, end);
661 end = 0; 672 end = 0;
662 } 673 }
@@ -664,13 +675,13 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
664 } 675 }
665 else 676 else
666 end = next; 677 end = next;
667 bh = bh->b_this_page; 678 block++;
668 } while (next < to); 679 } while (next < to);
669 680
670 if (end) { 681 if (end) {
671 error = __block_write_begin(page, start, end - start, gfs2_block_map); 682 ret = __block_write_begin(page, start, end - start, gfs2_block_map);
672 if (unlikely(error)) 683 if (unlikely(ret))
673 return error; 684 return ret;
674 empty_write_end(page, start, end); 685 empty_write_end(page, start, end);
675 } 686 }
676 687
@@ -976,8 +987,10 @@ static void do_unflock(struct file *file, struct file_lock *fl)
976 987
977 mutex_lock(&fp->f_fl_mutex); 988 mutex_lock(&fp->f_fl_mutex);
978 flock_lock_file_wait(file, fl); 989 flock_lock_file_wait(file, fl);
979 if (fl_gh->gh_gl) 990 if (fl_gh->gh_gl) {
980 gfs2_glock_dq_uninit(fl_gh); 991 gfs2_glock_dq_wait(fl_gh);
992 gfs2_holder_uninit(fl_gh);
993 }
981 mutex_unlock(&fp->f_fl_mutex); 994 mutex_unlock(&fp->f_fl_mutex);
982} 995}
983 996
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 7cd9a5a68d59..e2431313491f 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -26,6 +26,9 @@
26#include <linux/freezer.h> 26#include <linux/freezer.h>
27#include <linux/workqueue.h> 27#include <linux/workqueue.h>
28#include <linux/jiffies.h> 28#include <linux/jiffies.h>
29#include <linux/rcupdate.h>
30#include <linux/rculist_bl.h>
31#include <linux/bit_spinlock.h>
29 32
30#include "gfs2.h" 33#include "gfs2.h"
31#include "incore.h" 34#include "incore.h"
@@ -41,10 +44,6 @@
41#define CREATE_TRACE_POINTS 44#define CREATE_TRACE_POINTS
42#include "trace_gfs2.h" 45#include "trace_gfs2.h"
43 46
44struct gfs2_gl_hash_bucket {
45 struct hlist_head hb_list;
46};
47
48struct gfs2_glock_iter { 47struct gfs2_glock_iter {
49 int hash; /* hash bucket index */ 48 int hash; /* hash bucket index */
50 struct gfs2_sbd *sdp; /* incore superblock */ 49 struct gfs2_sbd *sdp; /* incore superblock */
@@ -54,7 +53,6 @@ struct gfs2_glock_iter {
54 53
55typedef void (*glock_examiner) (struct gfs2_glock * gl); 54typedef void (*glock_examiner) (struct gfs2_glock * gl);
56 55
57static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
58static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); 56static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl);
59#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) 57#define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0)
60static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); 58static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock);
70#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) 68#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
71#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) 69#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1)
72 70
73static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; 71static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE];
74static struct dentry *gfs2_root; 72static struct dentry *gfs2_root;
75 73
76/*
77 * Despite what you might think, the numbers below are not arbitrary :-)
78 * They are taken from the ipv4 routing hash code, which is well tested
79 * and thus should be nearly optimal. Later on we might tweek the numbers
80 * but for now this should be fine.
81 *
82 * The reason for putting the locks in a separate array from the list heads
83 * is that we can have fewer locks than list heads and save memory. We use
84 * the same hash function for both, but with a different hash mask.
85 */
86#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
87 defined(CONFIG_PROVE_LOCKING)
88
89#ifdef CONFIG_LOCKDEP
90# define GL_HASH_LOCK_SZ 256
91#else
92# if NR_CPUS >= 32
93# define GL_HASH_LOCK_SZ 4096
94# elif NR_CPUS >= 16
95# define GL_HASH_LOCK_SZ 2048
96# elif NR_CPUS >= 8
97# define GL_HASH_LOCK_SZ 1024
98# elif NR_CPUS >= 4
99# define GL_HASH_LOCK_SZ 512
100# else
101# define GL_HASH_LOCK_SZ 256
102# endif
103#endif
104
105/* We never want more locks than chains */
106#if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ
107# undef GL_HASH_LOCK_SZ
108# define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE
109#endif
110
111static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ];
112
113static inline rwlock_t *gl_lock_addr(unsigned int x)
114{
115 return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)];
116}
117#else /* not SMP, so no spinlocks required */
118static inline rwlock_t *gl_lock_addr(unsigned int x)
119{
120 return NULL;
121}
122#endif
123
124/** 74/**
125 * gl_hash() - Turn glock number into hash bucket number 75 * gl_hash() - Turn glock number into hash bucket number
126 * @lock: The glock number 76 * @lock: The glock number
@@ -141,25 +91,35 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp,
141 return h; 91 return h;
142} 92}
143 93
144/** 94static inline void spin_lock_bucket(unsigned int hash)
145 * glock_free() - Perform a few checks and then release struct gfs2_glock 95{
146 * @gl: The glock to release 96 struct hlist_bl_head *bl = &gl_hash_table[hash];
147 * 97 bit_spin_lock(0, (unsigned long *)bl);
148 * Also calls lock module to release its internal structure for this glock. 98}
149 *
150 */
151 99
152static void glock_free(struct gfs2_glock *gl) 100static inline void spin_unlock_bucket(unsigned int hash)
101{
102 struct hlist_bl_head *bl = &gl_hash_table[hash];
103 __bit_spin_unlock(0, (unsigned long *)bl);
104}
105
106static void gfs2_glock_dealloc(struct rcu_head *rcu)
107{
108 struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
109
110 if (gl->gl_ops->go_flags & GLOF_ASPACE)
111 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
112 else
113 kmem_cache_free(gfs2_glock_cachep, gl);
114}
115
116void gfs2_glock_free(struct gfs2_glock *gl)
153{ 117{
154 struct gfs2_sbd *sdp = gl->gl_sbd; 118 struct gfs2_sbd *sdp = gl->gl_sbd;
155 struct address_space *mapping = gfs2_glock2aspace(gl);
156 struct kmem_cache *cachep = gfs2_glock_cachep;
157 119
158 GLOCK_BUG_ON(gl, mapping && mapping->nrpages); 120 call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
159 trace_gfs2_glock_put(gl); 121 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
160 if (mapping) 122 wake_up(&sdp->sd_glock_wait);
161 cachep = gfs2_glock_aspace_cachep;
162 sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl);
163} 123}
164 124
165/** 125/**
@@ -185,34 +145,49 @@ static int demote_ok(const struct gfs2_glock *gl)
185{ 145{
186 const struct gfs2_glock_operations *glops = gl->gl_ops; 146 const struct gfs2_glock_operations *glops = gl->gl_ops;
187 147
148 /* assert_spin_locked(&gl->gl_spin); */
149
188 if (gl->gl_state == LM_ST_UNLOCKED) 150 if (gl->gl_state == LM_ST_UNLOCKED)
189 return 0; 151 return 0;
190 if (!list_empty(&gl->gl_holders)) 152 if (test_bit(GLF_LFLUSH, &gl->gl_flags))
153 return 0;
154 if ((gl->gl_name.ln_type != LM_TYPE_INODE) &&
155 !list_empty(&gl->gl_holders))
191 return 0; 156 return 0;
192 if (glops->go_demote_ok) 157 if (glops->go_demote_ok)
193 return glops->go_demote_ok(gl); 158 return glops->go_demote_ok(gl);
194 return 1; 159 return 1;
195} 160}
196 161
162
197/** 163/**
198 * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list 164 * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list
199 * @gl: the glock 165 * @gl: the glock
200 * 166 *
167 * If the glock is demotable, then we add it (or move it) to the end
168 * of the glock LRU list.
201 */ 169 */
202 170
203static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) 171static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
204{ 172{
205 int may_reclaim; 173 if (demote_ok(gl)) {
206 may_reclaim = (demote_ok(gl) && 174 spin_lock(&lru_lock);
207 (atomic_read(&gl->gl_ref) == 1 || 175
208 (gl->gl_name.ln_type == LM_TYPE_INODE && 176 if (!list_empty(&gl->gl_lru))
209 atomic_read(&gl->gl_ref) <= 2))); 177 list_del_init(&gl->gl_lru);
210 spin_lock(&lru_lock); 178 else
211 if (list_empty(&gl->gl_lru) && may_reclaim) { 179 atomic_inc(&lru_count);
180
212 list_add_tail(&gl->gl_lru, &lru_list); 181 list_add_tail(&gl->gl_lru, &lru_list);
213 atomic_inc(&lru_count); 182 spin_unlock(&lru_lock);
214 } 183 }
215 spin_unlock(&lru_lock); 184}
185
186void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl)
187{
188 spin_lock(&gl->gl_spin);
189 __gfs2_glock_schedule_for_reclaim(gl);
190 spin_unlock(&gl->gl_spin);
216} 191}
217 192
218/** 193/**
@@ -227,7 +202,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
227{ 202{
228 if (atomic_dec_and_test(&gl->gl_ref)) 203 if (atomic_dec_and_test(&gl->gl_ref))
229 GLOCK_BUG_ON(gl, 1); 204 GLOCK_BUG_ON(gl, 1);
230 gfs2_glock_schedule_for_reclaim(gl);
231} 205}
232 206
233/** 207/**
@@ -236,30 +210,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl)
236 * 210 *
237 */ 211 */
238 212
239int gfs2_glock_put(struct gfs2_glock *gl) 213void gfs2_glock_put(struct gfs2_glock *gl)
240{ 214{
241 int rv = 0; 215 struct gfs2_sbd *sdp = gl->gl_sbd;
216 struct address_space *mapping = gfs2_glock2aspace(gl);
242 217
243 write_lock(gl_lock_addr(gl->gl_hash)); 218 if (atomic_dec_and_test(&gl->gl_ref)) {
244 if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { 219 spin_lock_bucket(gl->gl_hash);
245 hlist_del(&gl->gl_list); 220 hlist_bl_del_rcu(&gl->gl_list);
221 spin_unlock_bucket(gl->gl_hash);
222 spin_lock(&lru_lock);
246 if (!list_empty(&gl->gl_lru)) { 223 if (!list_empty(&gl->gl_lru)) {
247 list_del_init(&gl->gl_lru); 224 list_del_init(&gl->gl_lru);
248 atomic_dec(&lru_count); 225 atomic_dec(&lru_count);
249 } 226 }
250 spin_unlock(&lru_lock); 227 spin_unlock(&lru_lock);
251 write_unlock(gl_lock_addr(gl->gl_hash));
252 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); 228 GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
253 glock_free(gl); 229 GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
254 rv = 1; 230 trace_gfs2_glock_put(gl);
255 goto out; 231 sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
256 } 232 }
257 spin_lock(&gl->gl_spin);
258 gfs2_glock_schedule_for_reclaim(gl);
259 spin_unlock(&gl->gl_spin);
260 write_unlock(gl_lock_addr(gl->gl_hash));
261out:
262 return rv;
263} 233}
264 234
265/** 235/**
@@ -275,17 +245,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash,
275 const struct lm_lockname *name) 245 const struct lm_lockname *name)
276{ 246{
277 struct gfs2_glock *gl; 247 struct gfs2_glock *gl;
278 struct hlist_node *h; 248 struct hlist_bl_node *h;
279 249
280 hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { 250 hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) {
281 if (!lm_name_equal(&gl->gl_name, name)) 251 if (!lm_name_equal(&gl->gl_name, name))
282 continue; 252 continue;
283 if (gl->gl_sbd != sdp) 253 if (gl->gl_sbd != sdp)
284 continue; 254 continue;
285 255 if (atomic_inc_not_zero(&gl->gl_ref))
286 atomic_inc(&gl->gl_ref); 256 return gl;
287
288 return gl;
289 } 257 }
290 258
291 return NULL; 259 return NULL;
@@ -743,10 +711,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
743 struct gfs2_glock *gl, *tmp; 711 struct gfs2_glock *gl, *tmp;
744 unsigned int hash = gl_hash(sdp, &name); 712 unsigned int hash = gl_hash(sdp, &name);
745 struct address_space *mapping; 713 struct address_space *mapping;
714 struct kmem_cache *cachep;
746 715
747 read_lock(gl_lock_addr(hash)); 716 rcu_read_lock();
748 gl = search_bucket(hash, sdp, &name); 717 gl = search_bucket(hash, sdp, &name);
749 read_unlock(gl_lock_addr(hash)); 718 rcu_read_unlock();
750 719
751 *glp = gl; 720 *glp = gl;
752 if (gl) 721 if (gl)
@@ -755,9 +724,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
755 return -ENOENT; 724 return -ENOENT;
756 725
757 if (glops->go_flags & GLOF_ASPACE) 726 if (glops->go_flags & GLOF_ASPACE)
758 gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); 727 cachep = gfs2_glock_aspace_cachep;
759 else 728 else
760 gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); 729 cachep = gfs2_glock_cachep;
730 gl = kmem_cache_alloc(cachep, GFP_KERNEL);
761 if (!gl) 731 if (!gl)
762 return -ENOMEM; 732 return -ENOMEM;
763 733
@@ -790,15 +760,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
790 mapping->writeback_index = 0; 760 mapping->writeback_index = 0;
791 } 761 }
792 762
793 write_lock(gl_lock_addr(hash)); 763 spin_lock_bucket(hash);
794 tmp = search_bucket(hash, sdp, &name); 764 tmp = search_bucket(hash, sdp, &name);
795 if (tmp) { 765 if (tmp) {
796 write_unlock(gl_lock_addr(hash)); 766 spin_unlock_bucket(hash);
797 glock_free(gl); 767 kmem_cache_free(cachep, gl);
768 atomic_dec(&sdp->sd_glock_disposal);
798 gl = tmp; 769 gl = tmp;
799 } else { 770 } else {
800 hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); 771 hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]);
801 write_unlock(gl_lock_addr(hash)); 772 spin_unlock_bucket(hash);
802 } 773 }
803 774
804 *glp = gl; 775 *glp = gl;
@@ -1007,13 +978,13 @@ fail:
1007 insert_pt = &gh2->gh_list; 978 insert_pt = &gh2->gh_list;
1008 } 979 }
1009 set_bit(GLF_QUEUED, &gl->gl_flags); 980 set_bit(GLF_QUEUED, &gl->gl_flags);
981 trace_gfs2_glock_queue(gh, 1);
1010 if (likely(insert_pt == NULL)) { 982 if (likely(insert_pt == NULL)) {
1011 list_add_tail(&gh->gh_list, &gl->gl_holders); 983 list_add_tail(&gh->gh_list, &gl->gl_holders);
1012 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) 984 if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
1013 goto do_cancel; 985 goto do_cancel;
1014 return; 986 return;
1015 } 987 }
1016 trace_gfs2_glock_queue(gh, 1);
1017 list_add_tail(&gh->gh_list, insert_pt); 988 list_add_tail(&gh->gh_list, insert_pt);
1018do_cancel: 989do_cancel:
1019 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); 990 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1113,6 +1084,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1113 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1084 !test_bit(GLF_DEMOTE, &gl->gl_flags))
1114 fast_path = 1; 1085 fast_path = 1;
1115 } 1086 }
1087 __gfs2_glock_schedule_for_reclaim(gl);
1116 trace_gfs2_glock_queue(gh, 0); 1088 trace_gfs2_glock_queue(gh, 0);
1117 spin_unlock(&gl->gl_spin); 1089 spin_unlock(&gl->gl_spin);
1118 if (likely(fast_path)) 1090 if (likely(fast_path))
@@ -1276,10 +1248,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1276 1248
1277void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) 1249void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1278{ 1250{
1279 unsigned int x; 1251 while (num_gh--)
1280 1252 gfs2_glock_dq(&ghs[num_gh]);
1281 for (x = 0; x < num_gh; x++)
1282 gfs2_glock_dq(&ghs[x]);
1283} 1253}
1284 1254
1285/** 1255/**
@@ -1291,10 +1261,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
1291 1261
1292void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) 1262void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
1293{ 1263{
1294 unsigned int x; 1264 while (num_gh--)
1295 1265 gfs2_glock_dq_uninit(&ghs[num_gh]);
1296 for (x = 0; x < num_gh; x++)
1297 gfs2_glock_dq_uninit(&ghs[x]);
1298} 1266}
1299 1267
1300void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) 1268void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
@@ -1440,42 +1408,30 @@ static struct shrinker glock_shrinker = {
1440 * @sdp: the filesystem 1408 * @sdp: the filesystem
1441 * @bucket: the bucket 1409 * @bucket: the bucket
1442 * 1410 *
1443 * Returns: 1 if the bucket has entries
1444 */ 1411 */
1445 1412
1446static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, 1413static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp,
1447 unsigned int hash) 1414 unsigned int hash)
1448{ 1415{
1449 struct gfs2_glock *gl, *prev = NULL; 1416 struct gfs2_glock *gl;
1450 int has_entries = 0; 1417 struct hlist_bl_head *head = &gl_hash_table[hash];
1451 struct hlist_head *head = &gl_hash_table[hash].hb_list; 1418 struct hlist_bl_node *pos;
1452 1419
1453 read_lock(gl_lock_addr(hash)); 1420 rcu_read_lock();
1454 /* Can't use hlist_for_each_entry - don't want prefetch here */ 1421 hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) {
1455 if (hlist_empty(head)) 1422 if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref))
1456 goto out;
1457 gl = list_entry(head->first, struct gfs2_glock, gl_list);
1458 while(1) {
1459 if (!sdp || gl->gl_sbd == sdp) {
1460 gfs2_glock_hold(gl);
1461 read_unlock(gl_lock_addr(hash));
1462 if (prev)
1463 gfs2_glock_put(prev);
1464 prev = gl;
1465 examiner(gl); 1423 examiner(gl);
1466 has_entries = 1;
1467 read_lock(gl_lock_addr(hash));
1468 }
1469 if (gl->gl_list.next == NULL)
1470 break;
1471 gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list);
1472 } 1424 }
1473out: 1425 rcu_read_unlock();
1474 read_unlock(gl_lock_addr(hash));
1475 if (prev)
1476 gfs2_glock_put(prev);
1477 cond_resched(); 1426 cond_resched();
1478 return has_entries; 1427}
1428
1429static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
1430{
1431 unsigned x;
1432
1433 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1434 examine_bucket(examiner, sdp, x);
1479} 1435}
1480 1436
1481 1437
@@ -1529,10 +1485,21 @@ static void clear_glock(struct gfs2_glock *gl)
1529 1485
1530void gfs2_glock_thaw(struct gfs2_sbd *sdp) 1486void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1531{ 1487{
1532 unsigned x; 1488 glock_hash_walk(thaw_glock, sdp);
1489}
1533 1490
1534 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) 1491static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1535 examine_bucket(thaw_glock, sdp, x); 1492{
1493 int ret;
1494 spin_lock(&gl->gl_spin);
1495 ret = __dump_glock(seq, gl);
1496 spin_unlock(&gl->gl_spin);
1497 return ret;
1498}
1499
1500static void dump_glock_func(struct gfs2_glock *gl)
1501{
1502 dump_glock(NULL, gl);
1536} 1503}
1537 1504
1538/** 1505/**
@@ -1545,13 +1512,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp)
1545 1512
1546void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) 1513void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
1547{ 1514{
1548 unsigned int x; 1515 glock_hash_walk(clear_glock, sdp);
1549
1550 for (x = 0; x < GFS2_GL_HASH_SIZE; x++)
1551 examine_bucket(clear_glock, sdp, x);
1552 flush_workqueue(glock_workqueue); 1516 flush_workqueue(glock_workqueue);
1553 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); 1517 wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
1554 gfs2_dump_lockstate(sdp); 1518 glock_hash_walk(dump_glock_func, sdp);
1555} 1519}
1556 1520
1557void gfs2_glock_finish_truncate(struct gfs2_inode *ip) 1521void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
@@ -1717,66 +1681,15 @@ out:
1717 return error; 1681 return error;
1718} 1682}
1719 1683
1720static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl)
1721{
1722 int ret;
1723 spin_lock(&gl->gl_spin);
1724 ret = __dump_glock(seq, gl);
1725 spin_unlock(&gl->gl_spin);
1726 return ret;
1727}
1728 1684
1729/**
1730 * gfs2_dump_lockstate - print out the current lockstate
1731 * @sdp: the filesystem
1732 * @ub: the buffer to copy the information into
1733 *
1734 * If @ub is NULL, dump the lockstate to the console.
1735 *
1736 */
1737
1738static int gfs2_dump_lockstate(struct gfs2_sbd *sdp)
1739{
1740 struct gfs2_glock *gl;
1741 struct hlist_node *h;
1742 unsigned int x;
1743 int error = 0;
1744
1745 for (x = 0; x < GFS2_GL_HASH_SIZE; x++) {
1746
1747 read_lock(gl_lock_addr(x));
1748
1749 hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) {
1750 if (gl->gl_sbd != sdp)
1751 continue;
1752
1753 error = dump_glock(NULL, gl);
1754 if (error)
1755 break;
1756 }
1757
1758 read_unlock(gl_lock_addr(x));
1759
1760 if (error)
1761 break;
1762 }
1763
1764
1765 return error;
1766}
1767 1685
1768 1686
1769int __init gfs2_glock_init(void) 1687int __init gfs2_glock_init(void)
1770{ 1688{
1771 unsigned i; 1689 unsigned i;
1772 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { 1690 for(i = 0; i < GFS2_GL_HASH_SIZE; i++) {
1773 INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); 1691 INIT_HLIST_BL_HEAD(&gl_hash_table[i]);
1774 }
1775#ifdef GL_HASH_LOCK_SZ
1776 for(i = 0; i < GL_HASH_LOCK_SZ; i++) {
1777 rwlock_init(&gl_hash_locks[i]);
1778 } 1692 }
1779#endif
1780 1693
1781 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | 1694 glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
1782 WQ_HIGHPRI | WQ_FREEZABLE, 0); 1695 WQ_HIGHPRI | WQ_FREEZABLE, 0);
@@ -1802,62 +1715,54 @@ void gfs2_glock_exit(void)
1802 destroy_workqueue(gfs2_delete_workqueue); 1715 destroy_workqueue(gfs2_delete_workqueue);
1803} 1716}
1804 1717
1718static inline struct gfs2_glock *glock_hash_chain(unsigned hash)
1719{
1720 return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]),
1721 struct gfs2_glock, gl_list);
1722}
1723
1724static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl)
1725{
1726 return hlist_bl_entry(rcu_dereference(gl->gl_list.next),
1727 struct gfs2_glock, gl_list);
1728}
1729
1805static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) 1730static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
1806{ 1731{
1807 struct gfs2_glock *gl; 1732 struct gfs2_glock *gl;
1808 1733
1809restart: 1734 do {
1810 read_lock(gl_lock_addr(gi->hash)); 1735 gl = gi->gl;
1811 gl = gi->gl; 1736 if (gl) {
1812 if (gl) { 1737 gi->gl = glock_hash_next(gl);
1813 gi->gl = hlist_entry(gl->gl_list.next, 1738 } else {
1814 struct gfs2_glock, gl_list); 1739 gi->gl = glock_hash_chain(gi->hash);
1815 } else { 1740 }
1816 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, 1741 while (gi->gl == NULL) {
1817 struct gfs2_glock, gl_list); 1742 gi->hash++;
1818 } 1743 if (gi->hash >= GFS2_GL_HASH_SIZE) {
1819 if (gi->gl) 1744 rcu_read_unlock();
1820 gfs2_glock_hold(gi->gl); 1745 return 1;
1821 read_unlock(gl_lock_addr(gi->hash)); 1746 }
1822 if (gl) 1747 gi->gl = glock_hash_chain(gi->hash);
1823 gfs2_glock_put(gl); 1748 }
1824 while (gi->gl == NULL) { 1749 /* Skip entries for other sb and dead entries */
1825 gi->hash++; 1750 } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0);
1826 if (gi->hash >= GFS2_GL_HASH_SIZE)
1827 return 1;
1828 read_lock(gl_lock_addr(gi->hash));
1829 gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first,
1830 struct gfs2_glock, gl_list);
1831 if (gi->gl)
1832 gfs2_glock_hold(gi->gl);
1833 read_unlock(gl_lock_addr(gi->hash));
1834 }
1835
1836 if (gi->sdp != gi->gl->gl_sbd)
1837 goto restart;
1838 1751
1839 return 0; 1752 return 0;
1840} 1753}
1841 1754
1842static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi)
1843{
1844 if (gi->gl)
1845 gfs2_glock_put(gi->gl);
1846 gi->gl = NULL;
1847}
1848
1849static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) 1755static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
1850{ 1756{
1851 struct gfs2_glock_iter *gi = seq->private; 1757 struct gfs2_glock_iter *gi = seq->private;
1852 loff_t n = *pos; 1758 loff_t n = *pos;
1853 1759
1854 gi->hash = 0; 1760 gi->hash = 0;
1761 rcu_read_lock();
1855 1762
1856 do { 1763 do {
1857 if (gfs2_glock_iter_next(gi)) { 1764 if (gfs2_glock_iter_next(gi))
1858 gfs2_glock_iter_free(gi);
1859 return NULL; 1765 return NULL;
1860 }
1861 } while (n--); 1766 } while (n--);
1862 1767
1863 return gi->gl; 1768 return gi->gl;
@@ -1870,10 +1775,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1870 1775
1871 (*pos)++; 1776 (*pos)++;
1872 1777
1873 if (gfs2_glock_iter_next(gi)) { 1778 if (gfs2_glock_iter_next(gi))
1874 gfs2_glock_iter_free(gi);
1875 return NULL; 1779 return NULL;
1876 }
1877 1780
1878 return gi->gl; 1781 return gi->gl;
1879} 1782}
@@ -1881,7 +1784,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
1881static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) 1784static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
1882{ 1785{
1883 struct gfs2_glock_iter *gi = seq->private; 1786 struct gfs2_glock_iter *gi = seq->private;
1884 gfs2_glock_iter_free(gi); 1787
1788 if (gi->gl)
1789 rcu_read_unlock();
1790 gi->gl = NULL;
1885} 1791}
1886 1792
1887static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) 1793static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 691851ceb615..aea160690e94 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -118,7 +118,7 @@ struct lm_lockops {
118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
119 void (*lm_unmount) (struct gfs2_sbd *sdp); 119 void (*lm_unmount) (struct gfs2_sbd *sdp);
120 void (*lm_withdraw) (struct gfs2_sbd *sdp); 120 void (*lm_withdraw) (struct gfs2_sbd *sdp);
121 void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); 121 void (*lm_put_lock) (struct gfs2_glock *gl);
122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, 122 int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state,
123 unsigned int flags); 123 unsigned int flags);
124 void (*lm_cancel) (struct gfs2_glock *gl); 124 void (*lm_cancel) (struct gfs2_glock *gl);
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp,
174 int create, struct gfs2_glock **glp); 174 int create, struct gfs2_glock **glp);
175void gfs2_glock_hold(struct gfs2_glock *gl); 175void gfs2_glock_hold(struct gfs2_glock *gl);
176void gfs2_glock_put_nolock(struct gfs2_glock *gl); 176void gfs2_glock_put_nolock(struct gfs2_glock *gl);
177int gfs2_glock_put(struct gfs2_glock *gl); 177void gfs2_glock_put(struct gfs2_glock *gl);
178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, 178void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
179 struct gfs2_holder *gh); 179 struct gfs2_holder *gh);
180void gfs2_holder_reinit(unsigned int state, unsigned flags, 180void gfs2_holder_reinit(unsigned int state, unsigned flags,
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl,
223 return error; 223 return error;
224} 224}
225 225
226/* Lock Value Block functions */ 226extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state);
227 227extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret);
228int gfs2_lvb_hold(struct gfs2_glock *gl); 228extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp);
229void gfs2_lvb_unhold(struct gfs2_glock *gl); 229extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp);
230 230extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip);
231void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); 231extern void gfs2_glock_thaw(struct gfs2_sbd *sdp);
232void gfs2_glock_complete(struct gfs2_glock *gl, int ret); 232extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl);
233void gfs2_reclaim_glock(struct gfs2_sbd *sdp); 233extern void gfs2_glock_free(struct gfs2_glock *gl);
234void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); 234
235void gfs2_glock_finish_truncate(struct gfs2_inode *ip); 235extern int __init gfs2_glock_init(void);
236void gfs2_glock_thaw(struct gfs2_sbd *sdp); 236extern void gfs2_glock_exit(void);
237 237
238int __init gfs2_glock_init(void); 238extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp);
239void gfs2_glock_exit(void); 239extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
240 240extern int gfs2_register_debugfs(void);
241int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); 241extern void gfs2_unregister_debugfs(void);
242void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
243int gfs2_register_debugfs(void);
244void gfs2_unregister_debugfs(void);
245 242
246extern const struct lm_lockops gfs2_dlm_ops; 243extern const struct lm_lockops gfs2_dlm_ops;
247 244
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 263561bf1a50..3754e3cbf02b 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -56,20 +56,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
56 BUG_ON(current->journal_info); 56 BUG_ON(current->journal_info);
57 current->journal_info = &tr; 57 current->journal_info = &tr;
58 58
59 gfs2_log_lock(sdp); 59 spin_lock(&sdp->sd_ail_lock);
60 while (!list_empty(head)) { 60 while (!list_empty(head)) {
61 bd = list_entry(head->next, struct gfs2_bufdata, 61 bd = list_entry(head->next, struct gfs2_bufdata,
62 bd_ail_gl_list); 62 bd_ail_gl_list);
63 bh = bd->bd_bh; 63 bh = bd->bd_bh;
64 gfs2_remove_from_ail(bd); 64 gfs2_remove_from_ail(bd);
65 spin_unlock(&sdp->sd_ail_lock);
66
65 bd->bd_bh = NULL; 67 bd->bd_bh = NULL;
66 bh->b_private = NULL; 68 bh->b_private = NULL;
67 bd->bd_blkno = bh->b_blocknr; 69 bd->bd_blkno = bh->b_blocknr;
70 gfs2_log_lock(sdp);
68 gfs2_assert_withdraw(sdp, !buffer_busy(bh)); 71 gfs2_assert_withdraw(sdp, !buffer_busy(bh));
69 gfs2_trans_add_revoke(sdp, bd); 72 gfs2_trans_add_revoke(sdp, bd);
73 gfs2_log_unlock(sdp);
74
75 spin_lock(&sdp->sd_ail_lock);
70 } 76 }
71 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); 77 gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
72 gfs2_log_unlock(sdp); 78 spin_unlock(&sdp->sd_ail_lock);
73 79
74 gfs2_trans_end(sdp); 80 gfs2_trans_end(sdp);
75 gfs2_log_flush(sdp, NULL); 81 gfs2_log_flush(sdp, NULL);
@@ -206,8 +212,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
206static int inode_go_demote_ok(const struct gfs2_glock *gl) 212static int inode_go_demote_ok(const struct gfs2_glock *gl)
207{ 213{
208 struct gfs2_sbd *sdp = gl->gl_sbd; 214 struct gfs2_sbd *sdp = gl->gl_sbd;
215 struct gfs2_holder *gh;
216
209 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) 217 if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
210 return 0; 218 return 0;
219
220 if (!list_empty(&gl->gl_holders)) {
221 gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
222 if (gh->gh_list.next != &gl->gl_holders)
223 return 0;
224 }
225
211 return 1; 226 return 1;
212} 227}
213 228
@@ -272,19 +287,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
272} 287}
273 288
274/** 289/**
275 * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
276 * @gl: the glock
277 *
278 * Returns: 1 if it's ok
279 */
280
281static int rgrp_go_demote_ok(const struct gfs2_glock *gl)
282{
283 const struct address_space *mapping = (const struct address_space *)(gl + 1);
284 return !mapping->nrpages;
285}
286
287/**
288 * rgrp_go_lock - operation done after an rgrp lock is locked by 290 * rgrp_go_lock - operation done after an rgrp lock is locked by
289 * a first holder on this node. 291 * a first holder on this node.
290 * @gl: the glock 292 * @gl: the glock
@@ -410,7 +412,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
410const struct gfs2_glock_operations gfs2_rgrp_glops = { 412const struct gfs2_glock_operations gfs2_rgrp_glops = {
411 .go_xmote_th = rgrp_go_sync, 413 .go_xmote_th = rgrp_go_sync,
412 .go_inval = rgrp_go_inval, 414 .go_inval = rgrp_go_inval,
413 .go_demote_ok = rgrp_go_demote_ok,
414 .go_lock = rgrp_go_lock, 415 .go_lock = rgrp_go_lock,
415 .go_unlock = rgrp_go_unlock, 416 .go_unlock = rgrp_go_unlock,
416 .go_dump = gfs2_rgrp_dump, 417 .go_dump = gfs2_rgrp_dump,
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index a79790c06275..870a89d6d4dc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -15,6 +15,8 @@
15#include <linux/workqueue.h> 15#include <linux/workqueue.h>
16#include <linux/dlm.h> 16#include <linux/dlm.h>
17#include <linux/buffer_head.h> 17#include <linux/buffer_head.h>
18#include <linux/rcupdate.h>
19#include <linux/rculist_bl.h>
18 20
19#define DIO_WAIT 0x00000010 21#define DIO_WAIT 0x00000010
20#define DIO_METADATA 0x00000020 22#define DIO_METADATA 0x00000020
@@ -201,7 +203,7 @@ enum {
201}; 203};
202 204
203struct gfs2_glock { 205struct gfs2_glock {
204 struct hlist_node gl_list; 206 struct hlist_bl_node gl_list;
205 unsigned long gl_flags; /* GLF_... */ 207 unsigned long gl_flags; /* GLF_... */
206 struct lm_lockname gl_name; 208 struct lm_lockname gl_name;
207 atomic_t gl_ref; 209 atomic_t gl_ref;
@@ -234,6 +236,7 @@ struct gfs2_glock {
234 atomic_t gl_ail_count; 236 atomic_t gl_ail_count;
235 struct delayed_work gl_work; 237 struct delayed_work gl_work;
236 struct work_struct gl_delete; 238 struct work_struct gl_delete;
239 struct rcu_head gl_rcu;
237}; 240};
238 241
239#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ 242#define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */
@@ -314,6 +317,7 @@ enum {
314 QDF_USER = 0, 317 QDF_USER = 0,
315 QDF_CHANGE = 1, 318 QDF_CHANGE = 1,
316 QDF_LOCKED = 2, 319 QDF_LOCKED = 2,
320 QDF_REFRESH = 3,
317}; 321};
318 322
319struct gfs2_quota_data { 323struct gfs2_quota_data {
@@ -647,6 +651,7 @@ struct gfs2_sbd {
647 unsigned int sd_log_flush_head; 651 unsigned int sd_log_flush_head;
648 u64 sd_log_flush_wrapped; 652 u64 sd_log_flush_wrapped;
649 653
654 spinlock_t sd_ail_lock;
650 struct list_head sd_ail1_list; 655 struct list_head sd_ail1_list;
651 struct list_head sd_ail2_list; 656 struct list_head sd_ail2_list;
652 u64 sd_ail_sync_gen; 657 u64 sd_ail_sync_gen;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7aa7d4f8984a..97d54a28776a 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -763,14 +763,15 @@ fail:
763 return error; 763 return error;
764} 764}
765 765
766static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip) 766static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
767 const struct qstr *qstr)
767{ 768{
768 int err; 769 int err;
769 size_t len; 770 size_t len;
770 void *value; 771 void *value;
771 char *name; 772 char *name;
772 773
773 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 774 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
774 &name, &value, &len); 775 &name, &value, &len);
775 776
776 if (err) { 777 if (err) {
@@ -854,7 +855,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
854 if (error) 855 if (error)
855 goto fail_gunlock2; 856 goto fail_gunlock2;
856 857
857 error = gfs2_security_init(dip, GFS2_I(inode)); 858 error = gfs2_security_init(dip, GFS2_I(inode), name);
858 if (error) 859 if (error)
859 goto fail_gunlock2; 860 goto fail_gunlock2;
860 861
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 6e493aee28f8..98c80d8c2a62 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg)
22{ 22{
23 struct gfs2_glock *gl = arg; 23 struct gfs2_glock *gl = arg;
24 unsigned ret = gl->gl_state; 24 unsigned ret = gl->gl_state;
25 struct gfs2_sbd *sdp = gl->gl_sbd;
26 25
27 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); 26 BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
28 27
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg)
31 30
32 switch (gl->gl_lksb.sb_status) { 31 switch (gl->gl_lksb.sb_status) {
33 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ 32 case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
34 if (gl->gl_ops->go_flags & GLOF_ASPACE) 33 gfs2_glock_free(gl);
35 kmem_cache_free(gfs2_glock_aspace_cachep, gl);
36 else
37 kmem_cache_free(gfs2_glock_cachep, gl);
38 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
39 wake_up(&sdp->sd_glock_wait);
40 return; 34 return;
41 case -DLM_ECANCEL: /* Cancel while getting lock */ 35 case -DLM_ECANCEL: /* Cancel while getting lock */
42 ret |= LM_OUT_CANCELED; 36 ret |= LM_OUT_CANCELED;
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
164 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); 158 GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
165} 159}
166 160
167static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) 161static void gdlm_put_lock(struct gfs2_glock *gl)
168{ 162{
169 struct gfs2_sbd *sdp = gl->gl_sbd; 163 struct gfs2_sbd *sdp = gl->gl_sbd;
170 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 164 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
171 int error; 165 int error;
172 166
173 if (gl->gl_lksb.sb_lkid == 0) { 167 if (gl->gl_lksb.sb_lkid == 0) {
174 kmem_cache_free(cachep, gl); 168 gfs2_glock_free(gl);
175 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
176 wake_up(&sdp->sd_glock_wait);
177 return; 169 return;
178 } 170 }
179 171
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index eb01f3575e10..e7ed31f858dd 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -67,7 +67,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,
67 * @mapping: The associated mapping (maybe NULL) 67 * @mapping: The associated mapping (maybe NULL)
68 * @bd: The gfs2_bufdata to remove 68 * @bd: The gfs2_bufdata to remove
69 * 69 *
70 * The log lock _must_ be held when calling this function 70 * The ail lock _must_ be held when calling this function
71 * 71 *
72 */ 72 */
73 73
@@ -88,8 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
88 */ 88 */
89 89
90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 90static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
91__releases(&sdp->sd_log_lock) 91__releases(&sdp->sd_ail_lock)
92__acquires(&sdp->sd_log_lock) 92__acquires(&sdp->sd_ail_lock)
93{ 93{
94 struct gfs2_bufdata *bd, *s; 94 struct gfs2_bufdata *bd, *s;
95 struct buffer_head *bh; 95 struct buffer_head *bh;
@@ -117,7 +117,7 @@ __acquires(&sdp->sd_log_lock)
117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); 117 list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list);
118 118
119 get_bh(bh); 119 get_bh(bh);
120 gfs2_log_unlock(sdp); 120 spin_unlock(&sdp->sd_ail_lock);
121 lock_buffer(bh); 121 lock_buffer(bh);
122 if (test_clear_buffer_dirty(bh)) { 122 if (test_clear_buffer_dirty(bh)) {
123 bh->b_end_io = end_buffer_write_sync; 123 bh->b_end_io = end_buffer_write_sync;
@@ -126,7 +126,7 @@ __acquires(&sdp->sd_log_lock)
126 unlock_buffer(bh); 126 unlock_buffer(bh);
127 brelse(bh); 127 brelse(bh);
128 } 128 }
129 gfs2_log_lock(sdp); 129 spin_lock(&sdp->sd_ail_lock);
130 130
131 retry = 1; 131 retry = 1;
132 break; 132 break;
@@ -175,10 +175,10 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
175 struct gfs2_ail *ai; 175 struct gfs2_ail *ai;
176 int done = 0; 176 int done = 0;
177 177
178 gfs2_log_lock(sdp); 178 spin_lock(&sdp->sd_ail_lock);
179 head = &sdp->sd_ail1_list; 179 head = &sdp->sd_ail1_list;
180 if (list_empty(head)) { 180 if (list_empty(head)) {
181 gfs2_log_unlock(sdp); 181 spin_unlock(&sdp->sd_ail_lock);
182 return; 182 return;
183 } 183 }
184 sync_gen = sdp->sd_ail_sync_gen++; 184 sync_gen = sdp->sd_ail_sync_gen++;
@@ -189,13 +189,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
189 if (ai->ai_sync_gen >= sync_gen) 189 if (ai->ai_sync_gen >= sync_gen)
190 continue; 190 continue;
191 ai->ai_sync_gen = sync_gen; 191 ai->ai_sync_gen = sync_gen;
192 gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */ 192 gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */
193 done = 0; 193 done = 0;
194 break; 194 break;
195 } 195 }
196 } 196 }
197 197
198 gfs2_log_unlock(sdp); 198 spin_unlock(&sdp->sd_ail_lock);
199} 199}
200 200
201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) 201static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
@@ -203,7 +203,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
203 struct gfs2_ail *ai, *s; 203 struct gfs2_ail *ai, *s;
204 int ret; 204 int ret;
205 205
206 gfs2_log_lock(sdp); 206 spin_lock(&sdp->sd_ail_lock);
207 207
208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { 208 list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) {
209 if (gfs2_ail1_empty_one(sdp, ai, flags)) 209 if (gfs2_ail1_empty_one(sdp, ai, flags))
@@ -214,7 +214,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)
214 214
215 ret = list_empty(&sdp->sd_ail1_list); 215 ret = list_empty(&sdp->sd_ail1_list);
216 216
217 gfs2_log_unlock(sdp); 217 spin_unlock(&sdp->sd_ail_lock);
218 218
219 return ret; 219 return ret;
220} 220}
@@ -247,7 +247,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
247 int wrap = (new_tail < old_tail); 247 int wrap = (new_tail < old_tail);
248 int a, b, rm; 248 int a, b, rm;
249 249
250 gfs2_log_lock(sdp); 250 spin_lock(&sdp->sd_ail_lock);
251 251
252 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { 252 list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) {
253 a = (old_tail <= ai->ai_first); 253 a = (old_tail <= ai->ai_first);
@@ -263,7 +263,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
263 kfree(ai); 263 kfree(ai);
264 } 264 }
265 265
266 gfs2_log_unlock(sdp); 266 spin_unlock(&sdp->sd_ail_lock);
267} 267}
268 268
269/** 269/**
@@ -421,7 +421,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
421 struct gfs2_ail *ai; 421 struct gfs2_ail *ai;
422 unsigned int tail; 422 unsigned int tail;
423 423
424 gfs2_log_lock(sdp); 424 spin_lock(&sdp->sd_ail_lock);
425 425
426 if (list_empty(&sdp->sd_ail1_list)) { 426 if (list_empty(&sdp->sd_ail1_list)) {
427 tail = sdp->sd_log_head; 427 tail = sdp->sd_log_head;
@@ -430,7 +430,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
430 tail = ai->ai_first; 430 tail = ai->ai_first;
431 } 431 }
432 432
433 gfs2_log_unlock(sdp); 433 spin_unlock(&sdp->sd_ail_lock);
434 434
435 return tail; 435 return tail;
436} 436}
@@ -743,10 +743,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
743 sdp->sd_log_commited_databuf = 0; 743 sdp->sd_log_commited_databuf = 0;
744 sdp->sd_log_commited_revoke = 0; 744 sdp->sd_log_commited_revoke = 0;
745 745
746 spin_lock(&sdp->sd_ail_lock);
746 if (!list_empty(&ai->ai_ail1_list)) { 747 if (!list_empty(&ai->ai_ail1_list)) {
747 list_add(&ai->ai_list, &sdp->sd_ail1_list); 748 list_add(&ai->ai_list, &sdp->sd_ail1_list);
748 ai = NULL; 749 ai = NULL;
749 } 750 }
751 spin_unlock(&sdp->sd_ail_lock);
750 gfs2_log_unlock(sdp); 752 gfs2_log_unlock(sdp);
751 trace_gfs2_log_flush(sdp, 0); 753 trace_gfs2_log_flush(sdp, 0);
752 up_write(&sdp->sd_log_flush_lock); 754 up_write(&sdp->sd_log_flush_lock);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index bf33f822058d..e919abf25ecd 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
51 /* If this buffer is in the AIL and it has already been written 51 /* If this buffer is in the AIL and it has already been written
52 * to in-place disk block, remove it from the AIL. 52 * to in-place disk block, remove it from the AIL.
53 */ 53 */
54 spin_lock(&sdp->sd_ail_lock);
54 if (bd->bd_ail) 55 if (bd->bd_ail)
55 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 56 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
57 spin_unlock(&sdp->sd_ail_lock);
56 get_bh(bh); 58 get_bh(bh);
57 atomic_inc(&sdp->sd_log_pinned); 59 atomic_inc(&sdp->sd_log_pinned);
58 trace_gfs2_pin(bd, 1); 60 trace_gfs2_pin(bd, 1);
@@ -80,7 +82,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
80 mark_buffer_dirty(bh); 82 mark_buffer_dirty(bh);
81 clear_buffer_pinned(bh); 83 clear_buffer_pinned(bh);
82 84
83 gfs2_log_lock(sdp); 85 spin_lock(&sdp->sd_ail_lock);
84 if (bd->bd_ail) { 86 if (bd->bd_ail) {
85 list_del(&bd->bd_ail_st_list); 87 list_del(&bd->bd_ail_st_list);
86 brelse(bh); 88 brelse(bh);
@@ -91,9 +93,11 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
91 } 93 }
92 bd->bd_ail = ai; 94 bd->bd_ail = ai;
93 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 95 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
94 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 96 spin_unlock(&sdp->sd_ail_lock);
97
98 if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags))
99 gfs2_glock_schedule_for_reclaim(bd->bd_gl);
95 trace_gfs2_pin(bd, 0); 100 trace_gfs2_pin(bd, 0);
96 gfs2_log_unlock(sdp);
97 unlock_buffer(bh); 101 unlock_buffer(bh);
98 atomic_dec(&sdp->sd_log_pinned); 102 atomic_dec(&sdp->sd_log_pinned);
99} 103}
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 72c31a315d96..888a5f5a1a58 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -14,6 +14,8 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/gfs2_ondisk.h> 16#include <linux/gfs2_ondisk.h>
17#include <linux/rcupdate.h>
18#include <linux/rculist_bl.h>
17#include <asm/atomic.h> 19#include <asm/atomic.h>
18 20
19#include "gfs2.h" 21#include "gfs2.h"
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo)
45{ 47{
46 struct gfs2_glock *gl = foo; 48 struct gfs2_glock *gl = foo;
47 49
48 INIT_HLIST_NODE(&gl->gl_list); 50 INIT_HLIST_BL_NODE(&gl->gl_list);
49 spin_lock_init(&gl->gl_spin); 51 spin_lock_init(&gl->gl_spin);
50 INIT_LIST_HEAD(&gl->gl_holders); 52 INIT_LIST_HEAD(&gl->gl_holders);
51 INIT_LIST_HEAD(&gl->gl_lru); 53 INIT_LIST_HEAD(&gl->gl_lru);
@@ -191,6 +193,8 @@ static void __exit exit_gfs2_fs(void)
191 unregister_filesystem(&gfs2meta_fs_type); 193 unregister_filesystem(&gfs2meta_fs_type);
192 destroy_workqueue(gfs_recovery_wq); 194 destroy_workqueue(gfs_recovery_wq);
193 195
196 rcu_barrier();
197
194 kmem_cache_destroy(gfs2_quotad_cachep); 198 kmem_cache_destroy(gfs2_quotad_cachep);
195 kmem_cache_destroy(gfs2_rgrpd_cachep); 199 kmem_cache_destroy(gfs2_rgrpd_cachep);
196 kmem_cache_destroy(gfs2_bufdata_cachep); 200 kmem_cache_destroy(gfs2_bufdata_cachep);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 939739c7b3f9..01d97f486553 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -326,6 +326,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
326 brelse(bh); 326 brelse(bh);
327 } 327 }
328 if (bd) { 328 if (bd) {
329 spin_lock(&sdp->sd_ail_lock);
329 if (bd->bd_ail) { 330 if (bd->bd_ail) {
330 gfs2_remove_from_ail(bd); 331 gfs2_remove_from_ail(bd);
331 bh->b_private = NULL; 332 bh->b_private = NULL;
@@ -333,6 +334,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int
333 bd->bd_blkno = bh->b_blocknr; 334 bd->bd_blkno = bh->b_blocknr;
334 gfs2_trans_add_revoke(sdp, bd); 335 gfs2_trans_add_revoke(sdp, bd);
335 } 336 }
337 spin_unlock(&sdp->sd_ail_lock);
336 } 338 }
337 clear_buffer_dirty(bh); 339 clear_buffer_dirty(bh);
338 clear_buffer_uptodate(bh); 340 clear_buffer_uptodate(bh);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 777927ce6f79..42ef24355afb 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
99 99
100 init_waitqueue_head(&sdp->sd_log_waitq); 100 init_waitqueue_head(&sdp->sd_log_waitq);
101 init_waitqueue_head(&sdp->sd_logd_waitq); 101 init_waitqueue_head(&sdp->sd_logd_waitq);
102 spin_lock_init(&sdp->sd_ail_lock);
102 INIT_LIST_HEAD(&sdp->sd_ail1_list); 103 INIT_LIST_HEAD(&sdp->sd_ail1_list);
103 INIT_LIST_HEAD(&sdp->sd_ail2_list); 104 INIT_LIST_HEAD(&sdp->sd_ail2_list);
104 105
@@ -928,17 +929,9 @@ static const match_table_t nolock_tokens = {
928 { Opt_err, NULL }, 929 { Opt_err, NULL },
929}; 930};
930 931
931static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
932{
933 struct gfs2_sbd *sdp = gl->gl_sbd;
934 kmem_cache_free(cachep, gl);
935 if (atomic_dec_and_test(&sdp->sd_glock_disposal))
936 wake_up(&sdp->sd_glock_wait);
937}
938
939static const struct lm_lockops nolock_ops = { 932static const struct lm_lockops nolock_ops = {
940 .lm_proto_name = "lock_nolock", 933 .lm_proto_name = "lock_nolock",
941 .lm_put_lock = nolock_put_lock, 934 .lm_put_lock = gfs2_glock_free,
942 .lm_tokens = &nolock_tokens, 935 .lm_tokens = &nolock_tokens,
943}; 936};
944 937
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index d8b26ac2e20b..09e436a50723 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1026 1026
1027/** 1027/**
1028 * gfs2_permission - 1028 * gfs2_permission -
1029 * @inode: 1029 * @inode: The inode
1030 * @mask: 1030 * @mask: The mask to be tested
1031 * @nd: passed from Linux VFS, ignored by us 1031 * @flags: Indicates whether this is an RCU path walk or not
1032 * 1032 *
1033 * This may be called from the VFS directly, or from within GFS2 with the 1033 * This may be called from the VFS directly, or from within GFS2 with the
1034 * inode locked, so we look to see if the glock is already locked and only 1034 * inode locked, so we look to see if the glock is already locked and only
@@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1044 int error; 1044 int error;
1045 int unlock = 0; 1045 int unlock = 0;
1046 1046
1047 if (flags & IPERM_FLAG_RCU)
1048 return -ECHILD;
1049 1047
1050 ip = GFS2_I(inode); 1048 ip = GFS2_I(inode);
1051 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1049 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1050 if (flags & IPERM_FLAG_RCU)
1051 return -ECHILD;
1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1052 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1053 if (error) 1053 if (error)
1054 return error; 1054 return error;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a689901963de..e23d9864c418 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -834,6 +834,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
834 goto out_end_trans; 834 goto out_end_trans;
835 835
836 do_qc(qd, -qd->qd_change_sync); 836 do_qc(qd, -qd->qd_change_sync);
837 set_bit(QDF_REFRESH, &qd->qd_flags);
837 } 838 }
838 839
839 error = 0; 840 error = 0;
@@ -929,6 +930,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
929{ 930{
930 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 931 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
931 struct gfs2_alloc *al = ip->i_alloc; 932 struct gfs2_alloc *al = ip->i_alloc;
933 struct gfs2_quota_data *qd;
932 unsigned int x; 934 unsigned int x;
933 int error = 0; 935 int error = 0;
934 936
@@ -942,7 +944,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
942 sort_qd, NULL); 944 sort_qd, NULL);
943 945
944 for (x = 0; x < al->al_qd_num; x++) { 946 for (x = 0; x < al->al_qd_num; x++) {
945 error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]); 947 int force = NO_FORCE;
948 qd = al->al_qd[x];
949 if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags))
950 force = FORCE;
951 error = do_glock(qd, force, &al->al_qd_ghs[x]);
946 if (error) 952 if (error)
947 break; 953 break;
948 } 954 }
@@ -1587,6 +1593,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1587 1593
1588 offset = qd2offset(qd); 1594 offset = qd2offset(qd);
1589 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); 1595 alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota));
1596 if (gfs2_is_stuffed(ip))
1597 alloc_required = 1;
1590 if (alloc_required) { 1598 if (alloc_required) {
1591 al = gfs2_alloc_get(ip); 1599 al = gfs2_alloc_get(ip);
1592 if (al == NULL) 1600 if (al == NULL)
@@ -1600,7 +1608,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
1600 blocks += gfs2_rg_blocks(al); 1608 blocks += gfs2_rg_blocks(al);
1601 } 1609 }
1602 1610
1603 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); 1611 /* Some quotas span block boundaries and can update two blocks,
1612 adding an extra block to the transaction to handle such quotas */
1613 error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0);
1604 if (error) 1614 if (error)
1605 goto out_release; 1615 goto out_release;
1606 1616
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7293ea27020c..cf930cd9664a 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1602,7 +1602,7 @@ rgrp_error:
1602 * 1602 *
1603 */ 1603 */
1604 1604
1605void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) 1605void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1606{ 1606{
1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1607 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1608 struct gfs2_rgrpd *rgd; 1608 struct gfs2_rgrpd *rgd;
@@ -1617,7 +1617,21 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1617 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1618 1618
1619 gfs2_trans_add_rg(rgd); 1619 gfs2_trans_add_rg(rgd);
1620}
1620 1621
1622/**
1623 * gfs2_free_data - free a contiguous run of data block(s)
1624 * @ip: the inode these blocks are being freed from
1625 * @bstart: first block of a run of contiguous blocks
1626 * @blen: the length of the block run
1627 *
1628 */
1629
1630void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1631{
1632 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1633
1634 __gfs2_free_data(ip, bstart, blen);
1621 gfs2_statfs_change(sdp, 0, +blen, 0); 1635 gfs2_statfs_change(sdp, 0, +blen, 0);
1622 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1636 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1623} 1637}
@@ -1630,7 +1644,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1630 * 1644 *
1631 */ 1645 */
1632 1646
1633void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) 1647void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1634{ 1648{
1635 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1649 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1636 struct gfs2_rgrpd *rgd; 1650 struct gfs2_rgrpd *rgd;
@@ -1645,10 +1659,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1645 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1659 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1646 1660
1647 gfs2_trans_add_rg(rgd); 1661 gfs2_trans_add_rg(rgd);
1662 gfs2_meta_wipe(ip, bstart, blen);
1663}
1648 1664
1665/**
1666 * gfs2_free_meta - free a contiguous run of data block(s)
1667 * @ip: the inode these blocks are being freed from
1668 * @bstart: first block of a run of contiguous blocks
1669 * @blen: the length of the block run
1670 *
1671 */
1672
1673void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1674{
1675 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1676
1677 __gfs2_free_meta(ip, bstart, blen);
1649 gfs2_statfs_change(sdp, 0, +blen, 0); 1678 gfs2_statfs_change(sdp, 0, +blen, 0);
1650 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1679 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1651 gfs2_meta_wipe(ip, bstart, blen);
1652} 1680}
1653 1681
1654void gfs2_unlink_di(struct inode *inode) 1682void gfs2_unlink_di(struct inode *inode)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 50c2bb04369c..a80e3034ac47 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
54 54
55extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
55extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); 56extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
56extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 58extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 59extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
58extern void gfs2_unlink_di(struct inode *inode); 60extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index afa66aaa2237..b4d70b13be92 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -238,46 +238,22 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
238} 238}
239 239
240/* 240/*
241 * hfs_unlink() 241 * hfs_remove()
242 * 242 *
243 * This is the unlink() entry in the inode_operations structure for 243 * This serves as both unlink() and rmdir() in the inode_operations
244 * regular HFS directories. The purpose is to delete an existing 244 * structure for regular HFS directories. The purpose is to delete
245 * file, given the inode for the parent directory and the name 245 * an existing child, given the inode for the parent directory and
246 * (and its length) of the existing file. 246 * the name (and its length) of the existing directory.
247 */
248static int hfs_unlink(struct inode *dir, struct dentry *dentry)
249{
250 struct inode *inode;
251 int res;
252
253 inode = dentry->d_inode;
254 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
255 if (res)
256 return res;
257
258 drop_nlink(inode);
259 hfs_delete_inode(inode);
260 inode->i_ctime = CURRENT_TIME_SEC;
261 mark_inode_dirty(inode);
262
263 return res;
264}
265
266/*
267 * hfs_rmdir()
268 * 247 *
269 * This is the rmdir() entry in the inode_operations structure for 248 * HFS does not have hardlinks, so both rmdir and unlink set the
270 * regular HFS directories. The purpose is to delete an existing 249 * link count to 0. The only difference is the emptiness check.
271 * directory, given the inode for the parent directory and the name
272 * (and its length) of the existing directory.
273 */ 250 */
274static int hfs_rmdir(struct inode *dir, struct dentry *dentry) 251static int hfs_remove(struct inode *dir, struct dentry *dentry)
275{ 252{
276 struct inode *inode; 253 struct inode *inode = dentry->d_inode;
277 int res; 254 int res;
278 255
279 inode = dentry->d_inode; 256 if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
280 if (inode->i_size != 2)
281 return -ENOTEMPTY; 257 return -ENOTEMPTY;
282 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); 258 res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name);
283 if (res) 259 if (res)
@@ -307,7 +283,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
307 283
308 /* Unlink destination if it already exists */ 284 /* Unlink destination if it already exists */
309 if (new_dentry->d_inode) { 285 if (new_dentry->d_inode) {
310 res = hfs_unlink(new_dir, new_dentry); 286 res = hfs_remove(new_dir, new_dentry);
311 if (res) 287 if (res)
312 return res; 288 return res;
313 } 289 }
@@ -332,9 +308,9 @@ const struct file_operations hfs_dir_operations = {
332const struct inode_operations hfs_dir_inode_operations = { 308const struct inode_operations hfs_dir_inode_operations = {
333 .create = hfs_create, 309 .create = hfs_create,
334 .lookup = hfs_lookup, 310 .lookup = hfs_lookup,
335 .unlink = hfs_unlink, 311 .unlink = hfs_remove,
336 .mkdir = hfs_mkdir, 312 .mkdir = hfs_mkdir,
337 .rmdir = hfs_rmdir, 313 .rmdir = hfs_remove,
338 .rename = hfs_rename, 314 .rename = hfs_rename,
339 .setattr = hfs_inode_setattr, 315 .setattr = hfs_inode_setattr,
340}; 316};
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 508ce662ce12..fbaa6690c8e0 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -47,7 +47,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
47 if (err) 47 if (err)
48 goto out; 48 goto out;
49 49
50 if (!is_owner_or_cap(inode)) { 50 if (!inode_owner_or_capable(inode)) {
51 err = -EACCES; 51 err = -EACCES;
52 goto out_drop_write; 52 goto out_drop_write;
53 } 53 }
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig
index 63b6f5632318..0c39dc3ef7d7 100644
--- a/fs/hpfs/Kconfig
+++ b/fs/hpfs/Kconfig
@@ -1,7 +1,7 @@
1config HPFS_FS 1config HPFS_FS
2 tristate "OS/2 HPFS file system support" 2 tristate "OS/2 HPFS file system support"
3 depends on BLOCK 3 depends on BLOCK
4 depends on BKL # nontrivial to fix 4 depends on BROKEN || !PREEMPT
5 help 5 help
6 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS 6 OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS
7 is the file system used for organizing files on OS/2 hard disk 7 is the file system used for organizing files on OS/2 hard disk
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index d32f63a569f7..b3d7c0ddb609 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -6,16 +6,15 @@
6 * directory VFS functions 6 * directory VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
10#include <linux/slab.h> 9#include <linux/slab.h>
11#include "hpfs_fn.h" 10#include "hpfs_fn.h"
12 11
13static int hpfs_dir_release(struct inode *inode, struct file *filp) 12static int hpfs_dir_release(struct inode *inode, struct file *filp)
14{ 13{
15 lock_kernel(); 14 hpfs_lock(inode->i_sb);
16 hpfs_del_pos(inode, &filp->f_pos); 15 hpfs_del_pos(inode, &filp->f_pos);
17 /*hpfs_write_if_changed(inode);*/ 16 /*hpfs_write_if_changed(inode);*/
18 unlock_kernel(); 17 hpfs_unlock(inode->i_sb);
19 return 0; 18 return 0;
20} 19}
21 20
@@ -30,7 +29,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
30 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 29 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
31 struct super_block *s = i->i_sb; 30 struct super_block *s = i->i_sb;
32 31
33 lock_kernel(); 32 hpfs_lock(s);
34 33
35 /*printk("dir lseek\n");*/ 34 /*printk("dir lseek\n");*/
36 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; 35 if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok;
@@ -43,12 +42,12 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
43 } 42 }
44 mutex_unlock(&i->i_mutex); 43 mutex_unlock(&i->i_mutex);
45ok: 44ok:
46 unlock_kernel(); 45 hpfs_unlock(s);
47 return filp->f_pos = new_off; 46 return filp->f_pos = new_off;
48fail: 47fail:
49 mutex_unlock(&i->i_mutex); 48 mutex_unlock(&i->i_mutex);
50 /*printk("illegal lseek: %016llx\n", new_off);*/ 49 /*printk("illegal lseek: %016llx\n", new_off);*/
51 unlock_kernel(); 50 hpfs_unlock(s);
52 return -ESPIPE; 51 return -ESPIPE;
53} 52}
54 53
@@ -64,7 +63,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
64 int c1, c2 = 0; 63 int c1, c2 = 0;
65 int ret = 0; 64 int ret = 0;
66 65
67 lock_kernel(); 66 hpfs_lock(inode->i_sb);
68 67
69 if (hpfs_sb(inode->i_sb)->sb_chk) { 68 if (hpfs_sb(inode->i_sb)->sb_chk) {
70 if (hpfs_chk_sectors(inode->i_sb, inode->i_ino, 1, "dir_fnode")) { 69 if (hpfs_chk_sectors(inode->i_sb, inode->i_ino, 1, "dir_fnode")) {
@@ -167,7 +166,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
167 hpfs_brelse4(&qbh); 166 hpfs_brelse4(&qbh);
168 } 167 }
169out: 168out:
170 unlock_kernel(); 169 hpfs_unlock(inode->i_sb);
171 return ret; 170 return ret;
172} 171}
173 172
@@ -197,10 +196,10 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
197 struct inode *result = NULL; 196 struct inode *result = NULL;
198 struct hpfs_inode_info *hpfs_result; 197 struct hpfs_inode_info *hpfs_result;
199 198
200 lock_kernel(); 199 hpfs_lock(dir->i_sb);
201 if ((err = hpfs_chk_name(name, &len))) { 200 if ((err = hpfs_chk_name(name, &len))) {
202 if (err == -ENAMETOOLONG) { 201 if (err == -ENAMETOOLONG) {
203 unlock_kernel(); 202 hpfs_unlock(dir->i_sb);
204 return ERR_PTR(-ENAMETOOLONG); 203 return ERR_PTR(-ENAMETOOLONG);
205 } 204 }
206 goto end_add; 205 goto end_add;
@@ -298,7 +297,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
298 297
299 end: 298 end:
300 end_add: 299 end_add:
301 unlock_kernel(); 300 hpfs_unlock(dir->i_sb);
302 d_add(dentry, result); 301 d_add(dentry, result);
303 return NULL; 302 return NULL;
304 303
@@ -311,7 +310,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name
311 310
312 /*bail:*/ 311 /*bail:*/
313 312
314 unlock_kernel(); 313 hpfs_unlock(dir->i_sb);
315 return ERR_PTR(-ENOENT); 314 return ERR_PTR(-ENOENT);
316} 315}
317 316
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index c0340887c7ea..2dbae20450f8 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -6,16 +6,15 @@
6 * file VFS functions 6 * file VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
10#include "hpfs_fn.h" 9#include "hpfs_fn.h"
11 10
12#define BLOCKS(size) (((size) + 511) >> 9) 11#define BLOCKS(size) (((size) + 511) >> 9)
13 12
14static int hpfs_file_release(struct inode *inode, struct file *file) 13static int hpfs_file_release(struct inode *inode, struct file *file)
15{ 14{
16 lock_kernel(); 15 hpfs_lock(inode->i_sb);
17 hpfs_write_if_changed(inode); 16 hpfs_write_if_changed(inode);
18 unlock_kernel(); 17 hpfs_unlock(inode->i_sb);
19 return 0; 18 return 0;
20} 19}
21 20
@@ -49,14 +48,14 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno)
49static void hpfs_truncate(struct inode *i) 48static void hpfs_truncate(struct inode *i)
50{ 49{
51 if (IS_IMMUTABLE(i)) return /*-EPERM*/; 50 if (IS_IMMUTABLE(i)) return /*-EPERM*/;
52 lock_kernel(); 51 hpfs_lock(i->i_sb);
53 hpfs_i(i)->i_n_secs = 0; 52 hpfs_i(i)->i_n_secs = 0;
54 i->i_blocks = 1 + ((i->i_size + 511) >> 9); 53 i->i_blocks = 1 + ((i->i_size + 511) >> 9);
55 hpfs_i(i)->mmu_private = i->i_size; 54 hpfs_i(i)->mmu_private = i->i_size;
56 hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9)); 55 hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9));
57 hpfs_write_inode(i); 56 hpfs_write_inode(i);
58 hpfs_i(i)->i_n_secs = 0; 57 hpfs_i(i)->i_n_secs = 0;
59 unlock_kernel(); 58 hpfs_unlock(i->i_sb);
60} 59}
61 60
62static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) 61static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1c43dbea55e8..c15adbca07ff 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -342,3 +342,25 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t)
342 extern struct timezone sys_tz; 342 extern struct timezone sys_tz;
343 return t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift; 343 return t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift;
344} 344}
345
346/*
347 * Locking:
348 *
349 * hpfs_lock() is a leftover from the big kernel lock.
350 * Right now, these functions are empty and only left
351 * for documentation purposes. The file system no longer
352 * works on SMP systems, so the lock is not needed
353 * any more.
354 *
355 * If someone is interested in making it work again, this
356 * would be the place to start by adding a per-superblock
357 * mutex and fixing all the bugs and performance issues
358 * caused by that.
359 */
360static inline void hpfs_lock(struct super_block *s)
361{
362}
363
364static inline void hpfs_unlock(struct super_block *s)
365{
366}
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 1ae35baa539e..87f1f787e767 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -6,7 +6,6 @@
6 * inode VFS functions 6 * inode VFS functions
7 */ 7 */
8 8
9#include <linux/smp_lock.h>
10#include <linux/slab.h> 9#include <linux/slab.h>
11#include "hpfs_fn.h" 10#include "hpfs_fn.h"
12 11
@@ -267,7 +266,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
267 struct inode *inode = dentry->d_inode; 266 struct inode *inode = dentry->d_inode;
268 int error = -EINVAL; 267 int error = -EINVAL;
269 268
270 lock_kernel(); 269 hpfs_lock(inode->i_sb);
271 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) 270 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
272 goto out_unlock; 271 goto out_unlock;
273 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) 272 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
@@ -290,7 +289,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
290 hpfs_write_inode(inode); 289 hpfs_write_inode(inode);
291 290
292 out_unlock: 291 out_unlock:
293 unlock_kernel(); 292 hpfs_unlock(inode->i_sb);
294 return error; 293 return error;
295} 294}
296 295
@@ -307,8 +306,8 @@ void hpfs_evict_inode(struct inode *inode)
307 truncate_inode_pages(&inode->i_data, 0); 306 truncate_inode_pages(&inode->i_data, 0);
308 end_writeback(inode); 307 end_writeback(inode);
309 if (!inode->i_nlink) { 308 if (!inode->i_nlink) {
310 lock_kernel(); 309 hpfs_lock(inode->i_sb);
311 hpfs_remove_fnode(inode->i_sb, inode->i_ino); 310 hpfs_remove_fnode(inode->i_sb, inode->i_ino);
312 unlock_kernel(); 311 hpfs_unlock(inode->i_sb);
313 } 312 }
314} 313}
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index f4ad9e31ddc4..d5f8c8a19023 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -6,7 +6,6 @@
6 * adding & removing files & directories 6 * adding & removing files & directories
7 */ 7 */
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/smp_lock.h>
10#include "hpfs_fn.h" 9#include "hpfs_fn.h"
11 10
12static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 11static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
@@ -25,7 +24,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
25 struct hpfs_dirent dee; 24 struct hpfs_dirent dee;
26 int err; 25 int err;
27 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; 26 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
28 lock_kernel(); 27 hpfs_lock(dir->i_sb);
29 err = -ENOSPC; 28 err = -ENOSPC;
30 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 29 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
31 if (!fnode) 30 if (!fnode)
@@ -103,7 +102,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
103 } 102 }
104 d_instantiate(dentry, result); 103 d_instantiate(dentry, result);
105 mutex_unlock(&hpfs_i(dir)->i_mutex); 104 mutex_unlock(&hpfs_i(dir)->i_mutex);
106 unlock_kernel(); 105 hpfs_unlock(dir->i_sb);
107 return 0; 106 return 0;
108bail3: 107bail3:
109 mutex_unlock(&hpfs_i(dir)->i_mutex); 108 mutex_unlock(&hpfs_i(dir)->i_mutex);
@@ -115,7 +114,7 @@ bail1:
115 brelse(bh); 114 brelse(bh);
116 hpfs_free_sectors(dir->i_sb, fno, 1); 115 hpfs_free_sectors(dir->i_sb, fno, 1);
117bail: 116bail:
118 unlock_kernel(); 117 hpfs_unlock(dir->i_sb);
119 return err; 118 return err;
120} 119}
121 120
@@ -132,7 +131,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
132 int err; 131 int err;
133 if ((err = hpfs_chk_name(name, &len))) 132 if ((err = hpfs_chk_name(name, &len)))
134 return err==-ENOENT ? -EINVAL : err; 133 return err==-ENOENT ? -EINVAL : err;
135 lock_kernel(); 134 hpfs_lock(dir->i_sb);
136 err = -ENOSPC; 135 err = -ENOSPC;
137 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 136 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
138 if (!fnode) 137 if (!fnode)
@@ -195,7 +194,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc
195 } 194 }
196 d_instantiate(dentry, result); 195 d_instantiate(dentry, result);
197 mutex_unlock(&hpfs_i(dir)->i_mutex); 196 mutex_unlock(&hpfs_i(dir)->i_mutex);
198 unlock_kernel(); 197 hpfs_unlock(dir->i_sb);
199 return 0; 198 return 0;
200 199
201bail2: 200bail2:
@@ -205,7 +204,7 @@ bail1:
205 brelse(bh); 204 brelse(bh);
206 hpfs_free_sectors(dir->i_sb, fno, 1); 205 hpfs_free_sectors(dir->i_sb, fno, 1);
207bail: 206bail:
208 unlock_kernel(); 207 hpfs_unlock(dir->i_sb);
209 return err; 208 return err;
210} 209}
211 210
@@ -224,7 +223,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
224 if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM; 223 if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM;
225 if (!new_valid_dev(rdev)) 224 if (!new_valid_dev(rdev))
226 return -EINVAL; 225 return -EINVAL;
227 lock_kernel(); 226 hpfs_lock(dir->i_sb);
228 err = -ENOSPC; 227 err = -ENOSPC;
229 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); 228 fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh);
230 if (!fnode) 229 if (!fnode)
@@ -274,7 +273,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t
274 d_instantiate(dentry, result); 273 d_instantiate(dentry, result);
275 mutex_unlock(&hpfs_i(dir)->i_mutex); 274 mutex_unlock(&hpfs_i(dir)->i_mutex);
276 brelse(bh); 275 brelse(bh);
277 unlock_kernel(); 276 hpfs_unlock(dir->i_sb);
278 return 0; 277 return 0;
279bail2: 278bail2:
280 mutex_unlock(&hpfs_i(dir)->i_mutex); 279 mutex_unlock(&hpfs_i(dir)->i_mutex);
@@ -283,7 +282,7 @@ bail1:
283 brelse(bh); 282 brelse(bh);
284 hpfs_free_sectors(dir->i_sb, fno, 1); 283 hpfs_free_sectors(dir->i_sb, fno, 1);
285bail: 284bail:
286 unlock_kernel(); 285 hpfs_unlock(dir->i_sb);
287 return err; 286 return err;
288} 287}
289 288
@@ -299,9 +298,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
299 struct inode *result; 298 struct inode *result;
300 int err; 299 int err;
301 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; 300 if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err;
302 lock_kernel(); 301 hpfs_lock(dir->i_sb);
303 if (hpfs_sb(dir->i_sb)->sb_eas < 2) { 302 if (hpfs_sb(dir->i_sb)->sb_eas < 2) {
304 unlock_kernel(); 303 hpfs_unlock(dir->i_sb);
305 return -EPERM; 304 return -EPERM;
306 } 305 }
307 err = -ENOSPC; 306 err = -ENOSPC;
@@ -354,7 +353,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
354 hpfs_write_inode_nolock(result); 353 hpfs_write_inode_nolock(result);
355 d_instantiate(dentry, result); 354 d_instantiate(dentry, result);
356 mutex_unlock(&hpfs_i(dir)->i_mutex); 355 mutex_unlock(&hpfs_i(dir)->i_mutex);
357 unlock_kernel(); 356 hpfs_unlock(dir->i_sb);
358 return 0; 357 return 0;
359bail2: 358bail2:
360 mutex_unlock(&hpfs_i(dir)->i_mutex); 359 mutex_unlock(&hpfs_i(dir)->i_mutex);
@@ -363,7 +362,7 @@ bail1:
363 brelse(bh); 362 brelse(bh);
364 hpfs_free_sectors(dir->i_sb, fno, 1); 363 hpfs_free_sectors(dir->i_sb, fno, 1);
365bail: 364bail:
366 unlock_kernel(); 365 hpfs_unlock(dir->i_sb);
367 return err; 366 return err;
368} 367}
369 368
@@ -380,7 +379,7 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
380 int rep = 0; 379 int rep = 0;
381 int err; 380 int err;
382 381
383 lock_kernel(); 382 hpfs_lock(dir->i_sb);
384 hpfs_adjust_length(name, &len); 383 hpfs_adjust_length(name, &len);
385again: 384again:
386 mutex_lock(&hpfs_i(inode)->i_parent_mutex); 385 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
@@ -416,7 +415,7 @@ again:
416 dentry_unhash(dentry); 415 dentry_unhash(dentry);
417 if (!d_unhashed(dentry)) { 416 if (!d_unhashed(dentry)) {
418 dput(dentry); 417 dput(dentry);
419 unlock_kernel(); 418 hpfs_unlock(dir->i_sb);
420 return -ENOSPC; 419 return -ENOSPC;
421 } 420 }
422 if (generic_permission(inode, MAY_WRITE, 0, NULL) || 421 if (generic_permission(inode, MAY_WRITE, 0, NULL) ||
@@ -435,7 +434,7 @@ again:
435 if (!err) 434 if (!err)
436 goto again; 435 goto again;
437 } 436 }
438 unlock_kernel(); 437 hpfs_unlock(dir->i_sb);
439 return -ENOSPC; 438 return -ENOSPC;
440 default: 439 default:
441 drop_nlink(inode); 440 drop_nlink(inode);
@@ -448,7 +447,7 @@ out1:
448out: 447out:
449 mutex_unlock(&hpfs_i(dir)->i_mutex); 448 mutex_unlock(&hpfs_i(dir)->i_mutex);
450 mutex_unlock(&hpfs_i(inode)->i_parent_mutex); 449 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
451 unlock_kernel(); 450 hpfs_unlock(dir->i_sb);
452 return err; 451 return err;
453} 452}
454 453
@@ -466,7 +465,7 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
466 int r; 465 int r;
467 466
468 hpfs_adjust_length(name, &len); 467 hpfs_adjust_length(name, &len);
469 lock_kernel(); 468 hpfs_lock(dir->i_sb);
470 mutex_lock(&hpfs_i(inode)->i_parent_mutex); 469 mutex_lock(&hpfs_i(inode)->i_parent_mutex);
471 mutex_lock(&hpfs_i(dir)->i_mutex); 470 mutex_lock(&hpfs_i(dir)->i_mutex);
472 err = -ENOENT; 471 err = -ENOENT;
@@ -508,7 +507,7 @@ out1:
508out: 507out:
509 mutex_unlock(&hpfs_i(dir)->i_mutex); 508 mutex_unlock(&hpfs_i(dir)->i_mutex);
510 mutex_unlock(&hpfs_i(inode)->i_parent_mutex); 509 mutex_unlock(&hpfs_i(inode)->i_parent_mutex);
511 unlock_kernel(); 510 hpfs_unlock(dir->i_sb);
512 return err; 511 return err;
513} 512}
514 513
@@ -521,21 +520,21 @@ static int hpfs_symlink_readpage(struct file *file, struct page *page)
521 int err; 520 int err;
522 521
523 err = -EIO; 522 err = -EIO;
524 lock_kernel(); 523 hpfs_lock(i->i_sb);
525 if (!(fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) 524 if (!(fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh)))
526 goto fail; 525 goto fail;
527 err = hpfs_read_ea(i->i_sb, fnode, "SYMLINK", link, PAGE_SIZE); 526 err = hpfs_read_ea(i->i_sb, fnode, "SYMLINK", link, PAGE_SIZE);
528 brelse(bh); 527 brelse(bh);
529 if (err) 528 if (err)
530 goto fail; 529 goto fail;
531 unlock_kernel(); 530 hpfs_unlock(i->i_sb);
532 SetPageUptodate(page); 531 SetPageUptodate(page);
533 kunmap(page); 532 kunmap(page);
534 unlock_page(page); 533 unlock_page(page);
535 return 0; 534 return 0;
536 535
537fail: 536fail:
538 unlock_kernel(); 537 hpfs_unlock(i->i_sb);
539 SetPageError(page); 538 SetPageError(page);
540 kunmap(page); 539 kunmap(page);
541 unlock_page(page); 540 unlock_page(page);
@@ -567,7 +566,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
567 err = 0; 566 err = 0;
568 hpfs_adjust_length(old_name, &old_len); 567 hpfs_adjust_length(old_name, &old_len);
569 568
570 lock_kernel(); 569 hpfs_lock(i->i_sb);
571 /* order doesn't matter, due to VFS exclusion */ 570 /* order doesn't matter, due to VFS exclusion */
572 mutex_lock(&hpfs_i(i)->i_parent_mutex); 571 mutex_lock(&hpfs_i(i)->i_parent_mutex);
573 if (new_inode) 572 if (new_inode)
@@ -659,7 +658,7 @@ end1:
659 mutex_unlock(&hpfs_i(i)->i_parent_mutex); 658 mutex_unlock(&hpfs_i(i)->i_parent_mutex);
660 if (new_inode) 659 if (new_inode)
661 mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex); 660 mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex);
662 unlock_kernel(); 661 hpfs_unlock(i->i_sb);
663 return err; 662 return err;
664} 663}
665 664
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index b30426b1fc97..c89b40808587 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -13,7 +13,6 @@
13#include <linux/statfs.h> 13#include <linux/statfs.h>
14#include <linux/magic.h> 14#include <linux/magic.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/smp_lock.h>
17#include <linux/bitmap.h> 16#include <linux/bitmap.h>
18#include <linux/slab.h> 17#include <linux/slab.h>
19 18
@@ -103,15 +102,11 @@ static void hpfs_put_super(struct super_block *s)
103{ 102{
104 struct hpfs_sb_info *sbi = hpfs_sb(s); 103 struct hpfs_sb_info *sbi = hpfs_sb(s);
105 104
106 lock_kernel();
107
108 kfree(sbi->sb_cp_table); 105 kfree(sbi->sb_cp_table);
109 kfree(sbi->sb_bmp_dir); 106 kfree(sbi->sb_bmp_dir);
110 unmark_dirty(s); 107 unmark_dirty(s);
111 s->s_fs_info = NULL; 108 s->s_fs_info = NULL;
112 kfree(sbi); 109 kfree(sbi);
113
114 unlock_kernel();
115} 110}
116 111
117unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) 112unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -143,7 +138,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
143 struct super_block *s = dentry->d_sb; 138 struct super_block *s = dentry->d_sb;
144 struct hpfs_sb_info *sbi = hpfs_sb(s); 139 struct hpfs_sb_info *sbi = hpfs_sb(s);
145 u64 id = huge_encode_dev(s->s_bdev->bd_dev); 140 u64 id = huge_encode_dev(s->s_bdev->bd_dev);
146 lock_kernel(); 141 hpfs_lock(s);
147 142
148 /*if (sbi->sb_n_free == -1) {*/ 143 /*if (sbi->sb_n_free == -1) {*/
149 sbi->sb_n_free = count_bitmaps(s); 144 sbi->sb_n_free = count_bitmaps(s);
@@ -160,7 +155,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf)
160 buf->f_fsid.val[1] = (u32)(id >> 32); 155 buf->f_fsid.val[1] = (u32)(id >> 32);
161 buf->f_namelen = 254; 156 buf->f_namelen = 254;
162 157
163 unlock_kernel(); 158 hpfs_unlock(s);
164 159
165 return 0; 160 return 0;
166} 161}
@@ -406,7 +401,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
406 401
407 *flags |= MS_NOATIME; 402 *flags |= MS_NOATIME;
408 403
409 lock_kernel(); 404 hpfs_lock(s);
410 lock_super(s); 405 lock_super(s);
411 uid = sbi->sb_uid; gid = sbi->sb_gid; 406 uid = sbi->sb_uid; gid = sbi->sb_gid;
412 umask = 0777 & ~sbi->sb_mode; 407 umask = 0777 & ~sbi->sb_mode;
@@ -441,12 +436,12 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
441 replace_mount_options(s, new_opts); 436 replace_mount_options(s, new_opts);
442 437
443 unlock_super(s); 438 unlock_super(s);
444 unlock_kernel(); 439 hpfs_unlock(s);
445 return 0; 440 return 0;
446 441
447out_err: 442out_err:
448 unlock_super(s); 443 unlock_super(s);
449 unlock_kernel(); 444 hpfs_unlock(s);
450 kfree(new_opts); 445 kfree(new_opts);
451 return -EINVAL; 446 return -EINVAL;
452} 447}
@@ -484,13 +479,15 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
484 479
485 int o; 480 int o;
486 481
487 lock_kernel(); 482 if (num_possible_cpus() > 1) {
483 printk(KERN_ERR "HPFS is not SMP safe\n");
484 return -EINVAL;
485 }
488 486
489 save_mount_options(s, options); 487 save_mount_options(s, options);
490 488
491 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 489 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
492 if (!sbi) { 490 if (!sbi) {
493 unlock_kernel();
494 return -ENOMEM; 491 return -ENOMEM;
495 } 492 }
496 s->s_fs_info = sbi; 493 s->s_fs_info = sbi;
@@ -677,7 +674,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
677 root->i_blocks = 5; 674 root->i_blocks = 5;
678 hpfs_brelse4(&qbh); 675 hpfs_brelse4(&qbh);
679 } 676 }
680 unlock_kernel();
681 return 0; 677 return 0;
682 678
683bail4: brelse(bh2); 679bail4: brelse(bh2);
@@ -689,7 +685,6 @@ bail0:
689 kfree(sbi->sb_cp_table); 685 kfree(sbi->sb_cp_table);
690 s->s_fs_info = NULL; 686 s->s_fs_info = NULL;
691 kfree(sbi); 687 kfree(sbi);
692 unlock_kernel();
693 return -EINVAL; 688 return -EINVAL;
694} 689}
695 690
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9885082b470f..b9eeb1cd03ff 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -332,8 +332,7 @@ static void truncate_huge_page(struct page *page)
332{ 332{
333 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 333 cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
334 ClearPageUptodate(page); 334 ClearPageUptodate(page);
335 remove_from_page_cache(page); 335 delete_from_page_cache(page);
336 put_page(page);
337} 336}
338 337
339static void truncate_hugepages(struct inode *inode, loff_t lstart) 338static void truncate_hugepages(struct inode *inode, loff_t lstart)
diff --git a/fs/inode.c b/fs/inode.c
index 0647d80accf6..0b3da4a77704 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -25,6 +25,7 @@
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/ima.h> 27#include <linux/ima.h>
28#include <linux/cred.h>
28 29
29/* 30/*
30 * This is needed for the following functions: 31 * This is needed for the following functions:
@@ -84,16 +85,13 @@ static struct hlist_head *inode_hashtable __read_mostly;
84DEFINE_SPINLOCK(inode_lock); 85DEFINE_SPINLOCK(inode_lock);
85 86
86/* 87/*
87 * iprune_sem provides exclusion between the kswapd or try_to_free_pages 88 * iprune_sem provides exclusion between the icache shrinking and the
88 * icache shrinking path, and the umount path. Without this exclusion, 89 * umount path.
89 * by the time prune_icache calls iput for the inode whose pages it has
90 * been invalidating, or by the time it calls clear_inode & destroy_inode
91 * from its final dispose_list, the struct super_block they refer to
92 * (for inode->i_sb->s_op) may already have been freed and reused.
93 * 90 *
94 * We make this an rwsem because the fastpath is icache shrinking. In 91 * We don't actually need it to protect anything in the umount path,
95 * some cases a filesystem may be doing a significant amount of work in 92 * but only need to cycle through it to make sure any inode that
96 * its inode reclaim code, so this should improve parallelism. 93 * prune_icache took off the LRU list has been fully torn down by the
94 * time we are past evict_inodes.
97 */ 95 */
98static DECLARE_RWSEM(iprune_sem); 96static DECLARE_RWSEM(iprune_sem);
99 97
@@ -516,17 +514,12 @@ void evict_inodes(struct super_block *sb)
516 struct inode *inode, *next; 514 struct inode *inode, *next;
517 LIST_HEAD(dispose); 515 LIST_HEAD(dispose);
518 516
519 down_write(&iprune_sem);
520
521 spin_lock(&inode_lock); 517 spin_lock(&inode_lock);
522 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 518 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
523 if (atomic_read(&inode->i_count)) 519 if (atomic_read(&inode->i_count))
524 continue; 520 continue;
525 521 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
526 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
527 WARN_ON(1);
528 continue; 522 continue;
529 }
530 523
531 inode->i_state |= I_FREEING; 524 inode->i_state |= I_FREEING;
532 525
@@ -542,6 +535,13 @@ void evict_inodes(struct super_block *sb)
542 spin_unlock(&inode_lock); 535 spin_unlock(&inode_lock);
543 536
544 dispose_list(&dispose); 537 dispose_list(&dispose);
538
539 /*
540 * Cycle through iprune_sem to make sure any inode that prune_icache
541 * moved off the list before we took the lock has been fully torn
542 * down.
543 */
544 down_write(&iprune_sem);
545 up_write(&iprune_sem); 545 up_write(&iprune_sem);
546} 546}
547 547
@@ -561,8 +561,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
561 struct inode *inode, *next; 561 struct inode *inode, *next;
562 LIST_HEAD(dispose); 562 LIST_HEAD(dispose);
563 563
564 down_write(&iprune_sem);
565
566 spin_lock(&inode_lock); 564 spin_lock(&inode_lock);
567 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { 565 list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
568 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) 566 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
@@ -590,7 +588,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
590 spin_unlock(&inode_lock); 588 spin_unlock(&inode_lock);
591 589
592 dispose_list(&dispose); 590 dispose_list(&dispose);
593 up_write(&iprune_sem);
594 591
595 return busy; 592 return busy;
596} 593}
@@ -1719,7 +1716,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1719EXPORT_SYMBOL(init_special_inode); 1716EXPORT_SYMBOL(init_special_inode);
1720 1717
1721/** 1718/**
1722 * Init uid,gid,mode for new inode according to posix standards 1719 * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
1723 * @inode: New inode 1720 * @inode: New inode
1724 * @dir: Directory inode 1721 * @dir: Directory inode
1725 * @mode: mode of the new inode 1722 * @mode: mode of the new inode
@@ -1737,3 +1734,22 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
1737 inode->i_mode = mode; 1734 inode->i_mode = mode;
1738} 1735}
1739EXPORT_SYMBOL(inode_init_owner); 1736EXPORT_SYMBOL(inode_init_owner);
1737
1738/**
1739 * inode_owner_or_capable - check current task permissions to inode
1740 * @inode: inode being checked
1741 *
1742 * Return true if current either has CAP_FOWNER to the inode, or
1743 * owns the file.
1744 */
1745bool inode_owner_or_capable(const struct inode *inode)
1746{
1747 struct user_namespace *ns = inode_userns(inode);
1748
1749 if (current_user_ns() == ns && current_fsuid() == inode->i_uid)
1750 return true;
1751 if (ns_capable(ns, CAP_FOWNER))
1752 return true;
1753 return false;
1754}
1755EXPORT_SYMBOL(inode_owner_or_capable);
diff --git a/fs/internal.h b/fs/internal.h
index 9b976b57d7fe..8318059b42c6 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -12,6 +12,7 @@
12#include <linux/lglock.h> 12#include <linux/lglock.h>
13 13
14struct super_block; 14struct super_block;
15struct file_system_type;
15struct linux_binprm; 16struct linux_binprm;
16struct path; 17struct path;
17 18
@@ -61,10 +62,9 @@ extern int check_unsafe_exec(struct linux_binprm *);
61extern int copy_mount_options(const void __user *, unsigned long *); 62extern int copy_mount_options(const void __user *, unsigned long *);
62extern int copy_mount_string(const void __user *, char **); 63extern int copy_mount_string(const void __user *, char **);
63 64
64extern void free_vfsmnt(struct vfsmount *);
65extern struct vfsmount *alloc_vfsmnt(const char *);
66extern unsigned int mnt_get_count(struct vfsmount *mnt); 65extern unsigned int mnt_get_count(struct vfsmount *mnt);
67extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int); 66extern struct vfsmount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
67extern struct vfsmount *lookup_mnt(struct path *);
68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, 68extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *,
69 struct vfsmount *); 69 struct vfsmount *);
70extern void release_mounts(struct list_head *); 70extern void release_mounts(struct list_head *);
@@ -99,6 +99,8 @@ extern struct file *get_empty_filp(void);
99extern int do_remount_sb(struct super_block *, int, void *, int); 99extern int do_remount_sb(struct super_block *, int, void *, int);
100extern void __put_super(struct super_block *sb); 100extern void __put_super(struct super_block *sb);
101extern void put_super(struct super_block *sb); 101extern void put_super(struct super_block *sb);
102extern struct dentry *mount_fs(struct file_system_type *,
103 int, const char *, void *);
102 104
103/* 105/*
104 * open.c 106 * open.c
@@ -106,6 +108,19 @@ extern void put_super(struct super_block *sb);
106struct nameidata; 108struct nameidata;
107extern struct file *nameidata_to_filp(struct nameidata *); 109extern struct file *nameidata_to_filp(struct nameidata *);
108extern void release_open_intent(struct nameidata *); 110extern void release_open_intent(struct nameidata *);
111struct open_flags {
112 int open_flag;
113 int mode;
114 int acc_mode;
115 int intent;
116};
117extern struct file *do_filp_open(int dfd, const char *pathname,
118 const struct open_flags *op, int lookup_flags);
119extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
120 const char *, const struct open_flags *, int lookup_flags);
121
122extern long do_handle_open(int mountdirfd,
123 struct file_handle __user *ufh, int open_flag);
109 124
110/* 125/*
111 * inode.c 126 * inode.c
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1eebeb72b202..1d9b9fcb2db4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -548,6 +548,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
548{ 548{
549 int error = 0; 549 int error = 0;
550 int __user *argp = (int __user *)arg; 550 int __user *argp = (int __user *)arg;
551 struct inode *inode = filp->f_path.dentry->d_inode;
551 552
552 switch (cmd) { 553 switch (cmd) {
553 case FIOCLEX: 554 case FIOCLEX:
@@ -567,13 +568,11 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
567 break; 568 break;
568 569
569 case FIOQSIZE: 570 case FIOQSIZE:
570 if (S_ISDIR(filp->f_path.dentry->d_inode->i_mode) || 571 if (S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
571 S_ISREG(filp->f_path.dentry->d_inode->i_mode) || 572 S_ISLNK(inode->i_mode)) {
572 S_ISLNK(filp->f_path.dentry->d_inode->i_mode)) { 573 loff_t res = inode_get_bytes(inode);
573 loff_t res = 574 error = copy_to_user(argp, &res, sizeof(res)) ?
574 inode_get_bytes(filp->f_path.dentry->d_inode); 575 -EFAULT : 0;
575 error = copy_to_user((loff_t __user *)arg, &res,
576 sizeof(res)) ? -EFAULT : 0;
577 } else 576 } else
578 error = -ENOTTY; 577 error = -ENOTTY;
579 break; 578 break;
@@ -590,14 +589,10 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
590 return ioctl_fiemap(filp, arg); 589 return ioctl_fiemap(filp, arg);
591 590
592 case FIGETBSZ: 591 case FIGETBSZ:
593 { 592 return put_user(inode->i_sb->s_blocksize, argp);
594 struct inode *inode = filp->f_path.dentry->d_inode;
595 int __user *p = (int __user *)arg;
596 return put_user(inode->i_sb->s_blocksize, p);
597 }
598 593
599 default: 594 default:
600 if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) 595 if (S_ISREG(inode->i_mode))
601 error = file_ioctl(filp, cmd, arg); 596 error = file_ioctl(filp, cmd, arg);
602 else 597 else
603 error = vfs_ioctl(filp, cmd, arg); 598 error = vfs_ioctl(filp, cmd, arg);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index ed752cb38474..dd4687ff30d0 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry,
124 * offset of the inode and the upper 16 bits of fh32[1] to 124 * offset of the inode and the upper 16 bits of fh32[1] to
125 * hold the offset of the parent. 125 * hold the offset of the parent.
126 */ 126 */
127 127 if (connectable && (len < 5)) {
128 if (len < 3 || (connectable && len < 5)) 128 *max_len = 5;
129 return 255;
130 } else if (len < 3) {
131 *max_len = 3;
129 return 255; 132 return 255;
133 }
130 134
131 len = 3; 135 len = 3;
132 fh32[0] = ei->i_iget5_block; 136 fh32[0] = ei->i_iget5_block;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index da1b5e4ffce1..eb11601f2e00 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -839,7 +839,7 @@ journal_t * journal_init_inode (struct inode *inode)
839 err = journal_bmap(journal, 0, &blocknr); 839 err = journal_bmap(journal, 0, &blocknr);
840 /* If that failed, give up */ 840 /* If that failed, give up */
841 if (err) { 841 if (err) {
842 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 842 printk(KERN_ERR "%s: Cannot locate journal superblock\n",
843 __func__); 843 __func__);
844 goto out_err; 844 goto out_err;
845 } 845 }
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 97e73469b2c4..90407b8fece7 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -991,7 +991,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
991 err = jbd2_journal_bmap(journal, 0, &blocknr); 991 err = jbd2_journal_bmap(journal, 0, &blocknr);
992 /* If that failed, give up */ 992 /* If that failed, give up */
993 if (err) { 993 if (err) {
994 printk(KERN_ERR "%s: Cannnot locate journal superblock\n", 994 printk(KERN_ERR "%s: Cannot locate journal superblock\n",
995 __func__); 995 __func__);
996 goto out_err; 996 goto out_err;
997 } 997 }
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 95b79672150a..828a0e1ea438 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -402,7 +402,7 @@ static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
402 402
403 if (name[0] != '\0') 403 if (name[0] != '\0')
404 return -EINVAL; 404 return -EINVAL;
405 if (!is_owner_or_cap(dentry->d_inode)) 405 if (!inode_owner_or_capable(dentry->d_inode))
406 return -EPERM; 406 return -EPERM;
407 407
408 if (value) { 408 if (value) {
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index fd05a0b9431d..5a001020c542 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -40,12 +40,13 @@ static z_stream inf_strm, def_strm;
40 40
41static int __init alloc_workspaces(void) 41static int __init alloc_workspaces(void)
42{ 42{
43 def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 43 def_strm.workspace = vmalloc(zlib_deflate_workspacesize(MAX_WBITS,
44 MAX_MEM_LEVEL));
44 if (!def_strm.workspace) { 45 if (!def_strm.workspace) {
45 printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize()); 46 printk(KERN_WARNING "Failed to allocate %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL));
46 return -ENOMEM; 47 return -ENOMEM;
47 } 48 }
48 D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize())); 49 D1(printk(KERN_DEBUG "Allocated %d bytes for deflate workspace\n", zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL)));
49 inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 50 inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
50 if (!inf_strm.workspace) { 51 if (!inf_strm.workspace) {
51 printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize()); 52 printk(KERN_WARNING "Failed to allocate %d bytes for inflate workspace\n", zlib_inflate_workspacesize());
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 92978658ed18..82faddd1f321 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -215,8 +215,7 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
215 no chance of AB-BA deadlock involving its f->sem). */ 215 no chance of AB-BA deadlock involving its f->sem). */
216 mutex_unlock(&f->sem); 216 mutex_unlock(&f->sem);
217 217
218 ret = jffs2_do_create(c, dir_f, f, ri, 218 ret = jffs2_do_create(c, dir_f, f, ri, &dentry->d_name);
219 dentry->d_name.name, dentry->d_name.len);
220 if (ret) 219 if (ret)
221 goto fail; 220 goto fail;
222 221
@@ -386,7 +385,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
386 385
387 jffs2_complete_reservation(c); 386 jffs2_complete_reservation(c);
388 387
389 ret = jffs2_init_security(inode, dir_i); 388 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
390 if (ret) 389 if (ret)
391 goto fail; 390 goto fail;
392 391
@@ -530,7 +529,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
530 529
531 jffs2_complete_reservation(c); 530 jffs2_complete_reservation(c);
532 531
533 ret = jffs2_init_security(inode, dir_i); 532 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
534 if (ret) 533 if (ret)
535 goto fail; 534 goto fail;
536 535
@@ -703,7 +702,7 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, int mode, de
703 702
704 jffs2_complete_reservation(c); 703 jffs2_complete_reservation(c);
705 704
706 ret = jffs2_init_security(inode, dir_i); 705 ret = jffs2_init_security(inode, dir_i, &dentry->d_name);
707 if (ret) 706 if (ret)
708 goto fail; 707 goto fail;
709 708
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 5a53d9bdb2b5..e4619b00f7c5 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -401,7 +401,7 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
401 struct jffs2_raw_inode *ri, unsigned char *buf, 401 struct jffs2_raw_inode *ri, unsigned char *buf,
402 uint32_t offset, uint32_t writelen, uint32_t *retlen); 402 uint32_t offset, uint32_t writelen, uint32_t *retlen);
403int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, 403int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f,
404 struct jffs2_raw_inode *ri, const char *name, int namelen); 404 struct jffs2_raw_inode *ri, const struct qstr *qstr);
405int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name, 405int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, const char *name,
406 int namelen, struct jffs2_inode_info *dead_f, uint32_t time); 406 int namelen, struct jffs2_inode_info *dead_f, uint32_t time);
407int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino, 407int jffs2_do_link(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, uint32_t ino,
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 239f51216a68..cfeb7164b085 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -23,14 +23,15 @@
23#include "nodelist.h" 23#include "nodelist.h"
24 24
25/* ---- Initial Security Label Attachment -------------- */ 25/* ---- Initial Security Label Attachment -------------- */
26int jffs2_init_security(struct inode *inode, struct inode *dir) 26int jffs2_init_security(struct inode *inode, struct inode *dir,
27 const struct qstr *qstr)
27{ 28{
28 int rc; 29 int rc;
29 size_t len; 30 size_t len;
30 void *value; 31 void *value;
31 char *name; 32 char *name;
32 33
33 rc = security_inode_init_security(inode, dir, &name, &value, &len); 34 rc = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
34 if (rc) { 35 if (rc) {
35 if (rc == -EOPNOTSUPP) 36 if (rc == -EOPNOTSUPP)
36 return 0; 37 return 0;
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index c819eb0e982d..30d175b6d290 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -424,7 +424,9 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
424 return ret; 424 return ret;
425} 425}
426 426
427int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, struct jffs2_inode_info *f, struct jffs2_raw_inode *ri, const char *name, int namelen) 427int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
428 struct jffs2_inode_info *f, struct jffs2_raw_inode *ri,
429 const struct qstr *qstr)
428{ 430{
429 struct jffs2_raw_dirent *rd; 431 struct jffs2_raw_dirent *rd;
430 struct jffs2_full_dnode *fn; 432 struct jffs2_full_dnode *fn;
@@ -466,15 +468,15 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
466 mutex_unlock(&f->sem); 468 mutex_unlock(&f->sem);
467 jffs2_complete_reservation(c); 469 jffs2_complete_reservation(c);
468 470
469 ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode); 471 ret = jffs2_init_security(&f->vfs_inode, &dir_f->vfs_inode, qstr);
470 if (ret) 472 if (ret)
471 return ret; 473 return ret;
472 ret = jffs2_init_acl_post(&f->vfs_inode); 474 ret = jffs2_init_acl_post(&f->vfs_inode);
473 if (ret) 475 if (ret)
474 return ret; 476 return ret;
475 477
476 ret = jffs2_reserve_space(c, sizeof(*rd)+namelen, &alloclen, 478 ret = jffs2_reserve_space(c, sizeof(*rd)+qstr->len, &alloclen,
477 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(namelen)); 479 ALLOC_NORMAL, JFFS2_SUMMARY_DIRENT_SIZE(qstr->len));
478 480
479 if (ret) { 481 if (ret) {
480 /* Eep. */ 482 /* Eep. */
@@ -493,19 +495,19 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
493 495
494 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK); 496 rd->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
495 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT); 497 rd->nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
496 rd->totlen = cpu_to_je32(sizeof(*rd) + namelen); 498 rd->totlen = cpu_to_je32(sizeof(*rd) + qstr->len);
497 rd->hdr_crc = cpu_to_je32(crc32(0, rd, sizeof(struct jffs2_unknown_node)-4)); 499 rd->hdr_crc = cpu_to_je32(crc32(0, rd, sizeof(struct jffs2_unknown_node)-4));
498 500
499 rd->pino = cpu_to_je32(dir_f->inocache->ino); 501 rd->pino = cpu_to_je32(dir_f->inocache->ino);
500 rd->version = cpu_to_je32(++dir_f->highest_version); 502 rd->version = cpu_to_je32(++dir_f->highest_version);
501 rd->ino = ri->ino; 503 rd->ino = ri->ino;
502 rd->mctime = ri->ctime; 504 rd->mctime = ri->ctime;
503 rd->nsize = namelen; 505 rd->nsize = qstr->len;
504 rd->type = DT_REG; 506 rd->type = DT_REG;
505 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8)); 507 rd->node_crc = cpu_to_je32(crc32(0, rd, sizeof(*rd)-8));
506 rd->name_crc = cpu_to_je32(crc32(0, name, namelen)); 508 rd->name_crc = cpu_to_je32(crc32(0, qstr->name, qstr->len));
507 509
508 fd = jffs2_write_dirent(c, dir_f, rd, name, namelen, ALLOC_NORMAL); 510 fd = jffs2_write_dirent(c, dir_f, rd, qstr->name, qstr->len, ALLOC_NORMAL);
509 511
510 jffs2_free_raw_dirent(rd); 512 jffs2_free_raw_dirent(rd);
511 513
diff --git a/fs/jffs2/xattr.h b/fs/jffs2/xattr.h
index cf4f5759b42b..7be4beb306f3 100644
--- a/fs/jffs2/xattr.h
+++ b/fs/jffs2/xattr.h
@@ -121,10 +121,11 @@ extern ssize_t jffs2_listxattr(struct dentry *, char *, size_t);
121#endif /* CONFIG_JFFS2_FS_XATTR */ 121#endif /* CONFIG_JFFS2_FS_XATTR */
122 122
123#ifdef CONFIG_JFFS2_FS_SECURITY 123#ifdef CONFIG_JFFS2_FS_SECURITY
124extern int jffs2_init_security(struct inode *inode, struct inode *dir); 124extern int jffs2_init_security(struct inode *inode, struct inode *dir,
125 const struct qstr *qstr);
125extern const struct xattr_handler jffs2_security_xattr_handler; 126extern const struct xattr_handler jffs2_security_xattr_handler;
126#else 127#else
127#define jffs2_init_security(inode,dir) (0) 128#define jffs2_init_security(inode,dir,qstr) (0)
128#endif /* CONFIG_JFFS2_FS_SECURITY */ 129#endif /* CONFIG_JFFS2_FS_SECURITY */
129 130
130#endif /* _JFFS2_FS_XATTR_H_ */ 131#endif /* _JFFS2_FS_XATTR_H_ */
diff --git a/fs/jfs/Makefile b/fs/jfs/Makefile
index 3adb6395e42d..a58fa72d7e59 100644
--- a/fs/jfs/Makefile
+++ b/fs/jfs/Makefile
@@ -13,4 +13,4 @@ jfs-y := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
13 13
14jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o 14jfs-$(CONFIG_JFS_POSIX_ACL) += acl.o
15 15
16EXTRA_CFLAGS += -D_JFS_4K 16ccflags-y := -D_JFS_4K
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index afe222bf300f..6f98a1866776 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -72,7 +72,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
72 if (err) 72 if (err)
73 return err; 73 return err;
74 74
75 if (!is_owner_or_cap(inode)) { 75 if (!inode_owner_or_capable(inode)) {
76 err = -EACCES; 76 err = -EACCES;
77 goto setflags_out; 77 goto setflags_out;
78 } 78 }
diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h
index 88b6cc535bf2..e9e100fd7c09 100644
--- a/fs/jfs/jfs_xattr.h
+++ b/fs/jfs/jfs_xattr.h
@@ -62,10 +62,11 @@ extern ssize_t jfs_listxattr(struct dentry *, char *, size_t);
62extern int jfs_removexattr(struct dentry *, const char *); 62extern int jfs_removexattr(struct dentry *, const char *);
63 63
64#ifdef CONFIG_JFS_SECURITY 64#ifdef CONFIG_JFS_SECURITY
65extern int jfs_init_security(tid_t, struct inode *, struct inode *); 65extern int jfs_init_security(tid_t, struct inode *, struct inode *,
66 const struct qstr *);
66#else 67#else
67static inline int jfs_init_security(tid_t tid, struct inode *inode, 68static inline int jfs_init_security(tid_t tid, struct inode *inode,
68 struct inode *dir) 69 struct inode *dir, const struct qstr *qstr)
69{ 70{
70 return 0; 71 return 0;
71} 72}
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 81ead850ddb6..eaaf2b511e89 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -115,7 +115,7 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
115 if (rc) 115 if (rc)
116 goto out3; 116 goto out3;
117 117
118 rc = jfs_init_security(tid, ip, dip); 118 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
119 if (rc) { 119 if (rc) {
120 txAbort(tid, 0); 120 txAbort(tid, 0);
121 goto out3; 121 goto out3;
@@ -253,7 +253,7 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
253 if (rc) 253 if (rc)
254 goto out3; 254 goto out3;
255 255
256 rc = jfs_init_security(tid, ip, dip); 256 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
257 if (rc) { 257 if (rc) {
258 txAbort(tid, 0); 258 txAbort(tid, 0);
259 goto out3; 259 goto out3;
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry,
809 if (ip->i_nlink == JFS_LINK_MAX) 809 if (ip->i_nlink == JFS_LINK_MAX)
810 return -EMLINK; 810 return -EMLINK;
811 811
812 if (ip->i_nlink == 0)
813 return -ENOENT;
814
815 dquot_initialize(dir); 812 dquot_initialize(dir);
816 813
817 tid = txBegin(ip->i_sb, 0); 814 tid = txBegin(ip->i_sb, 0);
@@ -932,7 +929,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
932 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT); 929 mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
933 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD); 930 mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
934 931
935 rc = jfs_init_security(tid, ip, dip); 932 rc = jfs_init_security(tid, ip, dip, &dentry->d_name);
936 if (rc) 933 if (rc)
937 goto out3; 934 goto out3;
938 935
@@ -1395,7 +1392,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
1395 if (rc) 1392 if (rc)
1396 goto out3; 1393 goto out3;
1397 1394
1398 rc = jfs_init_security(tid, ip, dir); 1395 rc = jfs_init_security(tid, ip, dir, &dentry->d_name);
1399 if (rc) { 1396 if (rc) {
1400 txAbort(tid, 0); 1397 txAbort(tid, 0);
1401 goto out3; 1398 goto out3;
@@ -1600,7 +1597,7 @@ out:
1600 1597
1601static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1598static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1602{ 1599{
1603 if (nd->flags & LOOKUP_RCU) 1600 if (nd && nd->flags & LOOKUP_RCU)
1604 return -ECHILD; 1601 return -ECHILD;
1605 /* 1602 /*
1606 * This is not negative dentry. Always valid. 1603 * This is not negative dentry. Always valid.
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 2d7f165d0f1d..24838f1eeee5 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -678,7 +678,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
678 struct posix_acl *acl; 678 struct posix_acl *acl;
679 int rc; 679 int rc;
680 680
681 if (!is_owner_or_cap(inode)) 681 if (!inode_owner_or_capable(inode))
682 return -EPERM; 682 return -EPERM;
683 683
684 /* 684 /*
@@ -1091,7 +1091,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
1091} 1091}
1092 1092
1093#ifdef CONFIG_JFS_SECURITY 1093#ifdef CONFIG_JFS_SECURITY
1094int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir) 1094int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir,
1095 const struct qstr *qstr)
1095{ 1096{
1096 int rc; 1097 int rc;
1097 size_t len; 1098 size_t len;
@@ -1099,7 +1100,8 @@ int jfs_init_security(tid_t tid, struct inode *inode, struct inode *dir)
1099 char *suffix; 1100 char *suffix;
1100 char *name; 1101 char *name;
1101 1102
1102 rc = security_inode_init_security(inode, dir, &suffix, &value, &len); 1103 rc = security_inode_init_security(inode, dir, qstr, &suffix, &value,
1104 &len);
1103 if (rc) { 1105 if (rc) {
1104 if (rc == -EOPNOTSUPP) 1106 if (rc == -EOPNOTSUPP)
1105 return 0; 1107 return 0;
diff --git a/fs/locks.c b/fs/locks.c
index 2c2d3b804d62..0a4f50dfadfb 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -145,7 +145,6 @@ static DEFINE_SPINLOCK(file_lock_lock);
145 145
146/* 146/*
147 * Protects the two list heads above, plus the inode->i_flock list 147 * Protects the two list heads above, plus the inode->i_flock list
148 * FIXME: should use a spinlock, once lockd and ceph are ready.
149 */ 148 */
150void lock_flocks(void) 149void lock_flocks(void)
151{ 150{
diff --git a/fs/logfs/compr.c b/fs/logfs/compr.c
index 44bbfd249abc..961f02b86d97 100644
--- a/fs/logfs/compr.c
+++ b/fs/logfs/compr.c
@@ -81,7 +81,7 @@ error:
81 81
82int __init logfs_compr_init(void) 82int __init logfs_compr_init(void)
83{ 83{
84 size_t size = max(zlib_deflate_workspacesize(), 84 size_t size = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
85 zlib_inflate_workspacesize()); 85 zlib_inflate_workspacesize());
86 stream.workspace = vmalloc(size); 86 stream.workspace = vmalloc(size);
87 if (!stream.workspace) 87 if (!stream.workspace)
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index e86376b87af1..c2ad7028def4 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -196,7 +196,7 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
196 if (IS_RDONLY(inode)) 196 if (IS_RDONLY(inode))
197 return -EROFS; 197 return -EROFS;
198 198
199 if (!is_owner_or_cap(inode)) 199 if (!inode_owner_or_capable(inode))
200 return -EACCES; 200 return -EACCES;
201 201
202 err = get_user(flags, (int __user *)arg); 202 err = get_user(flags, (int __user *)arg);
diff --git a/fs/minix/Kconfig b/fs/minix/Kconfig
index 0fd7ca994264..6624684dd5de 100644
--- a/fs/minix/Kconfig
+++ b/fs/minix/Kconfig
@@ -15,3 +15,11 @@ config MINIX_FS
15 module will be called minix. Note that the file system of your root 15 module will be called minix. Note that the file system of your root
16 partition (the one containing the directory /) cannot be compiled as 16 partition (the one containing the directory /) cannot be compiled as
17 a module. 17 a module.
18
19config MINIX_FS_NATIVE_ENDIAN
20 def_bool MINIX_FS
21 depends on H8300 || M32R || MICROBLAZE || MIPS || S390 || SUPERH || SPARC || XTENSA || (M68K && !MMU)
22
23config MINIX_FS_BIG_ENDIAN_16BIT_INDEXED
24 def_bool MINIX_FS
25 depends on M68K && MMU
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 407b1c84911e..341e2122879a 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -88,4 +88,78 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
88 return list_entry(inode, struct minix_inode_info, vfs_inode); 88 return list_entry(inode, struct minix_inode_info, vfs_inode);
89} 89}
90 90
91#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \
92 defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
93
94#error Minix file system byte order broken
95
96#elif defined(CONFIG_MINIX_FS_NATIVE_ENDIAN)
97
98/*
99 * big-endian 32 or 64 bit indexed bitmaps on big-endian system or
100 * little-endian bitmaps on little-endian system
101 */
102
103#define minix_test_and_set_bit(nr, addr) \
104 __test_and_set_bit((nr), (unsigned long *)(addr))
105#define minix_set_bit(nr, addr) \
106 __set_bit((nr), (unsigned long *)(addr))
107#define minix_test_and_clear_bit(nr, addr) \
108 __test_and_clear_bit((nr), (unsigned long *)(addr))
109#define minix_test_bit(nr, addr) \
110 test_bit((nr), (unsigned long *)(addr))
111#define minix_find_first_zero_bit(addr, size) \
112 find_first_zero_bit((unsigned long *)(addr), (size))
113
114#elif defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
115
116/*
117 * big-endian 16bit indexed bitmaps
118 */
119
120static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size)
121{
122 const unsigned short *p = vaddr, *addr = vaddr;
123 unsigned short num;
124
125 if (!size)
126 return 0;
127
128 size = (size >> 4) + ((size & 15) > 0);
129 while (*p++ == 0xffff) {
130 if (--size == 0)
131 return (p - addr) << 4;
132 }
133
134 num = *--p;
135 return ((p - addr) << 4) + ffz(num);
136}
137
138#define minix_test_and_set_bit(nr, addr) \
139 __test_and_set_bit((nr) ^ 16, (unsigned long *)(addr))
140#define minix_set_bit(nr, addr) \
141 __set_bit((nr) ^ 16, (unsigned long *)(addr))
142#define minix_test_and_clear_bit(nr, addr) \
143 __test_and_clear_bit((nr) ^ 16, (unsigned long *)(addr))
144
145static inline int minix_test_bit(int nr, const void *vaddr)
146{
147 const unsigned short *p = vaddr;
148 return (p[nr >> 4] & (1U << (nr & 15))) != 0;
149}
150
151#else
152
153/*
154 * little-endian bitmaps
155 */
156
157#define minix_test_and_set_bit __test_and_set_bit_le
158#define minix_set_bit __set_bit_le
159#define minix_test_and_clear_bit __test_and_clear_bit_le
160#define minix_test_bit test_bit_le
161#define minix_find_first_zero_bit find_first_zero_bit_le
162
163#endif
164
91#endif /* FS_MINIX_H */ 165#endif /* FS_MINIX_H */
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index ce7337ddfdbf..6e6777f1b4b2 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -213,7 +213,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
213 new_de = minix_find_entry(new_dentry, &new_page); 213 new_de = minix_find_entry(new_dentry, &new_page);
214 if (!new_de) 214 if (!new_de)
215 goto out_dir; 215 goto out_dir;
216 inode_inc_link_count(old_inode);
217 minix_set_link(new_de, new_page, old_inode); 216 minix_set_link(new_de, new_page, old_inode);
218 new_inode->i_ctime = CURRENT_TIME_SEC; 217 new_inode->i_ctime = CURRENT_TIME_SEC;
219 if (dir_de) 218 if (dir_de)
@@ -225,18 +224,15 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
225 if (new_dir->i_nlink >= info->s_link_max) 224 if (new_dir->i_nlink >= info->s_link_max)
226 goto out_dir; 225 goto out_dir;
227 } 226 }
228 inode_inc_link_count(old_inode);
229 err = minix_add_link(new_dentry, old_inode); 227 err = minix_add_link(new_dentry, old_inode);
230 if (err) { 228 if (err)
231 inode_dec_link_count(old_inode);
232 goto out_dir; 229 goto out_dir;
233 }
234 if (dir_de) 230 if (dir_de)
235 inode_inc_link_count(new_dir); 231 inode_inc_link_count(new_dir);
236 } 232 }
237 233
238 minix_delete_entry(old_de, old_page); 234 minix_delete_entry(old_de, old_page);
239 inode_dec_link_count(old_inode); 235 mark_inode_dirty(old_inode);
240 236
241 if (dir_de) { 237 if (dir_de) {
242 minix_set_link(dir_de, dir_page, new_dir); 238 minix_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/namei.c b/fs/namei.c
index 0087cf9c2c6b..d0066e17d45d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page)
136 return retval; 136 return retval;
137} 137}
138 138
139char * getname(const char __user * filename) 139static char *getname_flags(const char __user * filename, int flags)
140{ 140{
141 char *tmp, *result; 141 char *tmp, *result;
142 142
@@ -147,14 +147,21 @@ char * getname(const char __user * filename)
147 147
148 result = tmp; 148 result = tmp;
149 if (retval < 0) { 149 if (retval < 0) {
150 __putname(tmp); 150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 result = ERR_PTR(retval); 151 __putname(tmp);
152 result = ERR_PTR(retval);
153 }
152 } 154 }
153 } 155 }
154 audit_getname(result); 156 audit_getname(result);
155 return result; 157 return result;
156} 158}
157 159
160char *getname(const char __user * filename)
161{
162 return getname_flags(filename, 0);
163}
164
158#ifdef CONFIG_AUDITSYSCALL 165#ifdef CONFIG_AUDITSYSCALL
159void putname(const char *name) 166void putname(const char *name)
160{ 167{
@@ -176,6 +183,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
176 183
177 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
178 185
186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms;
188
179 if (current_fsuid() == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
180 mode >>= 6; 190 mode >>= 6;
181 else { 191 else {
@@ -189,6 +199,7 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
189 mode >>= 3; 199 mode >>= 3;
190 } 200 }
191 201
202other_perms:
192 /* 203 /*
193 * If the DACs are ok we don't need any capability check. 204 * If the DACs are ok we don't need any capability check.
194 */ 205 */
@@ -230,7 +241,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
230 * Executable DACs are overridable if at least one exec bit is set. 241 * Executable DACs are overridable if at least one exec bit is set.
231 */ 242 */
232 if (!(mask & MAY_EXEC) || execute_ok(inode)) 243 if (!(mask & MAY_EXEC) || execute_ok(inode))
233 if (capable(CAP_DAC_OVERRIDE)) 244 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
234 return 0; 245 return 0;
235 246
236 /* 247 /*
@@ -238,7 +249,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
238 */ 249 */
239 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 250 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
240 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 251 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
241 if (capable(CAP_DAC_READ_SEARCH)) 252 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
242 return 0; 253 return 0;
243 254
244 return -EACCES; 255 return -EACCES;
@@ -401,9 +412,11 @@ static int nameidata_drop_rcu(struct nameidata *nd)
401{ 412{
402 struct fs_struct *fs = current->fs; 413 struct fs_struct *fs = current->fs;
403 struct dentry *dentry = nd->path.dentry; 414 struct dentry *dentry = nd->path.dentry;
415 int want_root = 0;
404 416
405 BUG_ON(!(nd->flags & LOOKUP_RCU)); 417 BUG_ON(!(nd->flags & LOOKUP_RCU));
406 if (nd->root.mnt) { 418 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
419 want_root = 1;
407 spin_lock(&fs->lock); 420 spin_lock(&fs->lock);
408 if (nd->root.mnt != fs->root.mnt || 421 if (nd->root.mnt != fs->root.mnt ||
409 nd->root.dentry != fs->root.dentry) 422 nd->root.dentry != fs->root.dentry)
@@ -414,7 +427,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
414 goto err; 427 goto err;
415 BUG_ON(nd->inode != dentry->d_inode); 428 BUG_ON(nd->inode != dentry->d_inode);
416 spin_unlock(&dentry->d_lock); 429 spin_unlock(&dentry->d_lock);
417 if (nd->root.mnt) { 430 if (want_root) {
418 path_get(&nd->root); 431 path_get(&nd->root);
419 spin_unlock(&fs->lock); 432 spin_unlock(&fs->lock);
420 } 433 }
@@ -427,7 +440,7 @@ static int nameidata_drop_rcu(struct nameidata *nd)
427err: 440err:
428 spin_unlock(&dentry->d_lock); 441 spin_unlock(&dentry->d_lock);
429err_root: 442err_root:
430 if (nd->root.mnt) 443 if (want_root)
431 spin_unlock(&fs->lock); 444 spin_unlock(&fs->lock);
432 return -ECHILD; 445 return -ECHILD;
433} 446}
@@ -454,9 +467,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
454{ 467{
455 struct fs_struct *fs = current->fs; 468 struct fs_struct *fs = current->fs;
456 struct dentry *parent = nd->path.dentry; 469 struct dentry *parent = nd->path.dentry;
470 int want_root = 0;
457 471
458 BUG_ON(!(nd->flags & LOOKUP_RCU)); 472 BUG_ON(!(nd->flags & LOOKUP_RCU));
459 if (nd->root.mnt) { 473 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
474 want_root = 1;
460 spin_lock(&fs->lock); 475 spin_lock(&fs->lock);
461 if (nd->root.mnt != fs->root.mnt || 476 if (nd->root.mnt != fs->root.mnt ||
462 nd->root.dentry != fs->root.dentry) 477 nd->root.dentry != fs->root.dentry)
@@ -476,7 +491,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
476 parent->d_count++; 491 parent->d_count++;
477 spin_unlock(&dentry->d_lock); 492 spin_unlock(&dentry->d_lock);
478 spin_unlock(&parent->d_lock); 493 spin_unlock(&parent->d_lock);
479 if (nd->root.mnt) { 494 if (want_root) {
480 path_get(&nd->root); 495 path_get(&nd->root);
481 spin_unlock(&fs->lock); 496 spin_unlock(&fs->lock);
482 } 497 }
@@ -490,7 +505,7 @@ err:
490 spin_unlock(&dentry->d_lock); 505 spin_unlock(&dentry->d_lock);
491 spin_unlock(&parent->d_lock); 506 spin_unlock(&parent->d_lock);
492err_root: 507err_root:
493 if (nd->root.mnt) 508 if (want_root)
494 spin_unlock(&fs->lock); 509 spin_unlock(&fs->lock);
495 return -ECHILD; 510 return -ECHILD;
496} 511}
@@ -498,8 +513,16 @@ err_root:
498/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ 513/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
499static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) 514static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry)
500{ 515{
501 if (nd->flags & LOOKUP_RCU) 516 if (nd->flags & LOOKUP_RCU) {
502 return nameidata_dentry_drop_rcu(nd, dentry); 517 if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) {
518 nd->flags &= ~LOOKUP_RCU;
519 if (!(nd->flags & LOOKUP_ROOT))
520 nd->root.mnt = NULL;
521 rcu_read_unlock();
522 br_read_unlock(vfsmount_lock);
523 return -ECHILD;
524 }
525 }
503 return 0; 526 return 0;
504} 527}
505 528
@@ -518,7 +541,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd)
518 541
519 BUG_ON(!(nd->flags & LOOKUP_RCU)); 542 BUG_ON(!(nd->flags & LOOKUP_RCU));
520 nd->flags &= ~LOOKUP_RCU; 543 nd->flags &= ~LOOKUP_RCU;
521 nd->root.mnt = NULL; 544 if (!(nd->flags & LOOKUP_ROOT))
545 nd->root.mnt = NULL;
522 spin_lock(&dentry->d_lock); 546 spin_lock(&dentry->d_lock);
523 if (!__d_rcu_to_refcount(dentry, nd->seq)) 547 if (!__d_rcu_to_refcount(dentry, nd->seq))
524 goto err_unlock; 548 goto err_unlock;
@@ -539,14 +563,6 @@ err_unlock:
539 return -ECHILD; 563 return -ECHILD;
540} 564}
541 565
542/* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */
543static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
544{
545 if (likely(nd->flags & LOOKUP_RCU))
546 return nameidata_drop_rcu_last(nd);
547 return 0;
548}
549
550/** 566/**
551 * release_open_intent - free up open intent resources 567 * release_open_intent - free up open intent resources
552 * @nd: pointer to nameidata 568 * @nd: pointer to nameidata
@@ -590,42 +606,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
590 return dentry; 606 return dentry;
591} 607}
592 608
593static inline struct dentry *
594do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
595{
596 int status = d_revalidate(dentry, nd);
597 if (likely(status > 0))
598 return dentry;
599 if (status == -ECHILD) {
600 if (nameidata_dentry_drop_rcu(nd, dentry))
601 return ERR_PTR(-ECHILD);
602 return do_revalidate(dentry, nd);
603 }
604 if (status < 0)
605 return ERR_PTR(status);
606 /* Don't d_invalidate in rcu-walk mode */
607 if (nameidata_dentry_drop_rcu(nd, dentry))
608 return ERR_PTR(-ECHILD);
609 if (!d_invalidate(dentry)) {
610 dput(dentry);
611 dentry = NULL;
612 }
613 return dentry;
614}
615
616static inline int need_reval_dot(struct dentry *dentry)
617{
618 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
619 return 0;
620
621 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
622 return 0;
623
624 return 1;
625}
626
627/* 609/*
628 * force_reval_path - force revalidation of a dentry 610 * handle_reval_path - force revalidation of a dentry
629 * 611 *
630 * In some situations the path walking code will trust dentries without 612 * In some situations the path walking code will trust dentries without
631 * revalidating them. This causes problems for filesystems that depend on 613 * revalidating them. This causes problems for filesystems that depend on
@@ -639,27 +621,28 @@ static inline int need_reval_dot(struct dentry *dentry)
639 * invalidate the dentry. It's up to the caller to handle putting references 621 * invalidate the dentry. It's up to the caller to handle putting references
640 * to the path if necessary. 622 * to the path if necessary.
641 */ 623 */
642static int 624static inline int handle_reval_path(struct nameidata *nd)
643force_reval_path(struct path *path, struct nameidata *nd)
644{ 625{
626 struct dentry *dentry = nd->path.dentry;
645 int status; 627 int status;
646 struct dentry *dentry = path->dentry;
647 628
648 /* 629 if (likely(!(nd->flags & LOOKUP_JUMPED)))
649 * only check on filesystems where it's possible for the dentry to
650 * become stale.
651 */
652 if (!need_reval_dot(dentry))
653 return 0; 630 return 0;
654 631
632 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
633 return 0;
634
635 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
636 return 0;
637
638 /* Note: we do not d_invalidate() */
655 status = d_revalidate(dentry, nd); 639 status = d_revalidate(dentry, nd);
656 if (status > 0) 640 if (status > 0)
657 return 0; 641 return 0;
658 642
659 if (!status) { 643 if (!status)
660 d_invalidate(dentry);
661 status = -ESTALE; 644 status = -ESTALE;
662 } 645
663 return status; 646 return status;
664} 647}
665 648
@@ -675,6 +658,7 @@ force_reval_path(struct path *path, struct nameidata *nd)
675static inline int exec_permission(struct inode *inode, unsigned int flags) 658static inline int exec_permission(struct inode *inode, unsigned int flags)
676{ 659{
677 int ret; 660 int ret;
661 struct user_namespace *ns = inode_userns(inode);
678 662
679 if (inode->i_op->permission) { 663 if (inode->i_op->permission) {
680 ret = inode->i_op->permission(inode, MAY_EXEC, flags); 664 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
@@ -687,7 +671,8 @@ static inline int exec_permission(struct inode *inode, unsigned int flags)
687 if (ret == -ECHILD) 671 if (ret == -ECHILD)
688 return ret; 672 return ret;
689 673
690 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH)) 674 if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
675 ns_capable(ns, CAP_DAC_READ_SEARCH))
691 goto ok; 676 goto ok;
692 677
693 return ret; 678 return ret;
@@ -728,6 +713,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
728 path_put(&nd->path); 713 path_put(&nd->path);
729 nd->path = nd->root; 714 nd->path = nd->root;
730 path_get(&nd->root); 715 path_get(&nd->root);
716 nd->flags |= LOOKUP_JUMPED;
731 } 717 }
732 nd->inode = nd->path.dentry->d_inode; 718 nd->inode = nd->path.dentry->d_inode;
733 719
@@ -757,19 +743,42 @@ static inline void path_to_nameidata(const struct path *path,
757 nd->path.dentry = path->dentry; 743 nd->path.dentry = path->dentry;
758} 744}
759 745
746static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
747{
748 struct inode *inode = link->dentry->d_inode;
749 if (!IS_ERR(cookie) && inode->i_op->put_link)
750 inode->i_op->put_link(link->dentry, nd, cookie);
751 path_put(link);
752}
753
760static __always_inline int 754static __always_inline int
761__do_follow_link(const struct path *link, struct nameidata *nd, void **p) 755follow_link(struct path *link, struct nameidata *nd, void **p)
762{ 756{
763 int error; 757 int error;
764 struct dentry *dentry = link->dentry; 758 struct dentry *dentry = link->dentry;
765 759
766 BUG_ON(nd->flags & LOOKUP_RCU); 760 BUG_ON(nd->flags & LOOKUP_RCU);
767 761
762 if (link->mnt == nd->path.mnt)
763 mntget(link->mnt);
764
765 if (unlikely(current->total_link_count >= 40)) {
766 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
767 path_put(&nd->path);
768 return -ELOOP;
769 }
770 cond_resched();
771 current->total_link_count++;
772
768 touch_atime(link->mnt, dentry); 773 touch_atime(link->mnt, dentry);
769 nd_set_link(nd, NULL); 774 nd_set_link(nd, NULL);
770 775
771 if (link->mnt == nd->path.mnt) 776 error = security_inode_follow_link(link->dentry, nd);
772 mntget(link->mnt); 777 if (error) {
778 *p = ERR_PTR(error); /* no ->put_link(), please */
779 path_put(&nd->path);
780 return error;
781 }
773 782
774 nd->last_type = LAST_BIND; 783 nd->last_type = LAST_BIND;
775 *p = dentry->d_inode->i_op->follow_link(dentry, nd); 784 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
@@ -780,56 +789,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
780 if (s) 789 if (s)
781 error = __vfs_follow_link(nd, s); 790 error = __vfs_follow_link(nd, s);
782 else if (nd->last_type == LAST_BIND) { 791 else if (nd->last_type == LAST_BIND) {
783 error = force_reval_path(&nd->path, nd); 792 nd->flags |= LOOKUP_JUMPED;
784 if (error) 793 nd->inode = nd->path.dentry->d_inode;
794 if (nd->inode->i_op->follow_link) {
795 /* stepped on a _really_ weird one */
785 path_put(&nd->path); 796 path_put(&nd->path);
797 error = -ELOOP;
798 }
786 } 799 }
787 } 800 }
788 return error; 801 return error;
789} 802}
790 803
791/*
792 * This limits recursive symlink follows to 8, while
793 * limiting consecutive symlinks to 40.
794 *
795 * Without that kind of total limit, nasty chains of consecutive
796 * symlinks can cause almost arbitrarily long lookups.
797 */
798static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd)
799{
800 void *cookie;
801 int err = -ELOOP;
802
803 /* We drop rcu-walk here */
804 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
805 return -ECHILD;
806 BUG_ON(inode != path->dentry->d_inode);
807
808 if (current->link_count >= MAX_NESTED_LINKS)
809 goto loop;
810 if (current->total_link_count >= 40)
811 goto loop;
812 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
813 cond_resched();
814 err = security_inode_follow_link(path->dentry, nd);
815 if (err)
816 goto loop;
817 current->link_count++;
818 current->total_link_count++;
819 nd->depth++;
820 err = __do_follow_link(path, nd, &cookie);
821 if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link)
822 path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie);
823 path_put(path);
824 current->link_count--;
825 nd->depth--;
826 return err;
827loop:
828 path_put_conditional(path, nd);
829 path_put(&nd->path);
830 return err;
831}
832
833static int follow_up_rcu(struct path *path) 804static int follow_up_rcu(struct path *path)
834{ 805{
835 struct vfsmount *parent; 806 struct vfsmount *parent;
@@ -968,8 +939,7 @@ static int follow_managed(struct path *path, unsigned flags)
968 if (managed & DCACHE_MANAGE_TRANSIT) { 939 if (managed & DCACHE_MANAGE_TRANSIT) {
969 BUG_ON(!path->dentry->d_op); 940 BUG_ON(!path->dentry->d_op);
970 BUG_ON(!path->dentry->d_op->d_manage); 941 BUG_ON(!path->dentry->d_op->d_manage);
971 ret = path->dentry->d_op->d_manage(path->dentry, 942 ret = path->dentry->d_op->d_manage(path->dentry, false);
972 false, false);
973 if (ret < 0) 943 if (ret < 0)
974 return ret == -EISDIR ? 0 : ret; 944 return ret == -EISDIR ? 0 : ret;
975 } 945 }
@@ -1034,7 +1004,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
1034 struct vfsmount *mounted; 1004 struct vfsmount *mounted;
1035 if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && 1005 if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) &&
1036 !reverse_transit && 1006 !reverse_transit &&
1037 path->dentry->d_op->d_manage(path->dentry, false, true) < 0) 1007 path->dentry->d_op->d_manage(path->dentry, true) < 0)
1038 return false; 1008 return false;
1039 mounted = __lookup_mnt(path->mnt, path->dentry, 1); 1009 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
1040 if (!mounted) 1010 if (!mounted)
@@ -1068,7 +1038,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1068 1038
1069 seq = read_seqcount_begin(&parent->d_seq); 1039 seq = read_seqcount_begin(&parent->d_seq);
1070 if (read_seqcount_retry(&old->d_seq, nd->seq)) 1040 if (read_seqcount_retry(&old->d_seq, nd->seq))
1071 return -ECHILD; 1041 goto failed;
1072 inode = parent->d_inode; 1042 inode = parent->d_inode;
1073 nd->path.dentry = parent; 1043 nd->path.dentry = parent;
1074 nd->seq = seq; 1044 nd->seq = seq;
@@ -1081,8 +1051,15 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1081 } 1051 }
1082 __follow_mount_rcu(nd, &nd->path, &inode, true); 1052 __follow_mount_rcu(nd, &nd->path, &inode, true);
1083 nd->inode = inode; 1053 nd->inode = inode;
1084
1085 return 0; 1054 return 0;
1055
1056failed:
1057 nd->flags &= ~LOOKUP_RCU;
1058 if (!(nd->flags & LOOKUP_ROOT))
1059 nd->root.mnt = NULL;
1060 rcu_read_unlock();
1061 br_read_unlock(vfsmount_lock);
1062 return -ECHILD;
1086} 1063}
1087 1064
1088/* 1065/*
@@ -1093,7 +1070,7 @@ static int follow_dotdot_rcu(struct nameidata *nd)
1093 * Care must be taken as namespace_sem may be held (indicated by mounting_here 1070 * Care must be taken as namespace_sem may be held (indicated by mounting_here
1094 * being true). 1071 * being true).
1095 */ 1072 */
1096int follow_down(struct path *path, bool mounting_here) 1073int follow_down(struct path *path)
1097{ 1074{
1098 unsigned managed; 1075 unsigned managed;
1099 int ret; 1076 int ret;
@@ -1114,7 +1091,7 @@ int follow_down(struct path *path, bool mounting_here)
1114 BUG_ON(!path->dentry->d_op); 1091 BUG_ON(!path->dentry->d_op);
1115 BUG_ON(!path->dentry->d_op->d_manage); 1092 BUG_ON(!path->dentry->d_op->d_manage);
1116 ret = path->dentry->d_op->d_manage( 1093 ret = path->dentry->d_op->d_manage(
1117 path->dentry, mounting_here, false); 1094 path->dentry, false);
1118 if (ret < 0) 1095 if (ret < 0)
1119 return ret == -EISDIR ? 0 : ret; 1096 return ret == -EISDIR ? 0 : ret;
1120 } 1097 }
@@ -1216,68 +1193,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1216{ 1193{
1217 struct vfsmount *mnt = nd->path.mnt; 1194 struct vfsmount *mnt = nd->path.mnt;
1218 struct dentry *dentry, *parent = nd->path.dentry; 1195 struct dentry *dentry, *parent = nd->path.dentry;
1219 struct inode *dir; 1196 int need_reval = 1;
1197 int status = 1;
1220 int err; 1198 int err;
1221 1199
1222 /* 1200 /*
1223 * See if the low-level filesystem might want
1224 * to use its own hash..
1225 */
1226 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1227 err = parent->d_op->d_hash(parent, nd->inode, name);
1228 if (err < 0)
1229 return err;
1230 }
1231
1232 /*
1233 * Rename seqlock is not required here because in the off chance 1201 * Rename seqlock is not required here because in the off chance
1234 * of a false negative due to a concurrent rename, we're going to 1202 * of a false negative due to a concurrent rename, we're going to
1235 * do the non-racy lookup, below. 1203 * do the non-racy lookup, below.
1236 */ 1204 */
1237 if (nd->flags & LOOKUP_RCU) { 1205 if (nd->flags & LOOKUP_RCU) {
1238 unsigned seq; 1206 unsigned seq;
1239
1240 *inode = nd->inode; 1207 *inode = nd->inode;
1241 dentry = __d_lookup_rcu(parent, name, &seq, inode); 1208 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1242 if (!dentry) { 1209 if (!dentry)
1243 if (nameidata_drop_rcu(nd)) 1210 goto unlazy;
1244 return -ECHILD; 1211
1245 goto need_lookup;
1246 }
1247 /* Memory barrier in read_seqcount_begin of child is enough */ 1212 /* Memory barrier in read_seqcount_begin of child is enough */
1248 if (__read_seqcount_retry(&parent->d_seq, nd->seq)) 1213 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1249 return -ECHILD; 1214 return -ECHILD;
1250
1251 nd->seq = seq; 1215 nd->seq = seq;
1216
1252 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1217 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1253 dentry = do_revalidate_rcu(dentry, nd); 1218 status = d_revalidate(dentry, nd);
1254 if (!dentry) 1219 if (unlikely(status <= 0)) {
1255 goto need_lookup; 1220 if (status != -ECHILD)
1256 if (IS_ERR(dentry)) 1221 need_reval = 0;
1257 goto fail; 1222 goto unlazy;
1258 if (!(nd->flags & LOOKUP_RCU)) 1223 }
1259 goto done;
1260 } 1224 }
1261 path->mnt = mnt; 1225 path->mnt = mnt;
1262 path->dentry = dentry; 1226 path->dentry = dentry;
1263 if (likely(__follow_mount_rcu(nd, path, inode, false))) 1227 if (likely(__follow_mount_rcu(nd, path, inode, false)))
1264 return 0; 1228 return 0;
1265 if (nameidata_drop_rcu(nd)) 1229unlazy:
1266 return -ECHILD; 1230 if (dentry) {
1267 /* fallthru */ 1231 if (nameidata_dentry_drop_rcu(nd, dentry))
1232 return -ECHILD;
1233 } else {
1234 if (nameidata_drop_rcu(nd))
1235 return -ECHILD;
1236 }
1237 } else {
1238 dentry = __d_lookup(parent, name);
1268 } 1239 }
1269 dentry = __d_lookup(parent, name); 1240
1270 if (!dentry) 1241retry:
1271 goto need_lookup; 1242 if (unlikely(!dentry)) {
1272found: 1243 struct inode *dir = parent->d_inode;
1273 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { 1244 BUG_ON(nd->inode != dir);
1274 dentry = do_revalidate(dentry, nd); 1245
1275 if (!dentry) 1246 mutex_lock(&dir->i_mutex);
1276 goto need_lookup; 1247 dentry = d_lookup(parent, name);
1277 if (IS_ERR(dentry)) 1248 if (likely(!dentry)) {
1278 goto fail; 1249 dentry = d_alloc_and_lookup(parent, name, nd);
1250 if (IS_ERR(dentry)) {
1251 mutex_unlock(&dir->i_mutex);
1252 return PTR_ERR(dentry);
1253 }
1254 /* known good */
1255 need_reval = 0;
1256 status = 1;
1257 }
1258 mutex_unlock(&dir->i_mutex);
1279 } 1259 }
1280done: 1260 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1261 status = d_revalidate(dentry, nd);
1262 if (unlikely(status <= 0)) {
1263 if (status < 0) {
1264 dput(dentry);
1265 return status;
1266 }
1267 if (!d_invalidate(dentry)) {
1268 dput(dentry);
1269 dentry = NULL;
1270 need_reval = 1;
1271 goto retry;
1272 }
1273 }
1274
1281 path->mnt = mnt; 1275 path->mnt = mnt;
1282 path->dentry = dentry; 1276 path->dentry = dentry;
1283 err = follow_managed(path, nd->flags); 1277 err = follow_managed(path, nd->flags);
@@ -1287,39 +1281,113 @@ done:
1287 } 1281 }
1288 *inode = path->dentry->d_inode; 1282 *inode = path->dentry->d_inode;
1289 return 0; 1283 return 0;
1284}
1285
1286static inline int may_lookup(struct nameidata *nd)
1287{
1288 if (nd->flags & LOOKUP_RCU) {
1289 int err = exec_permission(nd->inode, IPERM_FLAG_RCU);
1290 if (err != -ECHILD)
1291 return err;
1292 if (nameidata_drop_rcu(nd))
1293 return -ECHILD;
1294 }
1295 return exec_permission(nd->inode, 0);
1296}
1290 1297
1291need_lookup: 1298static inline int handle_dots(struct nameidata *nd, int type)
1292 dir = parent->d_inode; 1299{
1293 BUG_ON(nd->inode != dir); 1300 if (type == LAST_DOTDOT) {
1301 if (nd->flags & LOOKUP_RCU) {
1302 if (follow_dotdot_rcu(nd))
1303 return -ECHILD;
1304 } else
1305 follow_dotdot(nd);
1306 }
1307 return 0;
1308}
1294 1309
1295 mutex_lock(&dir->i_mutex); 1310static void terminate_walk(struct nameidata *nd)
1296 /* 1311{
1297 * First re-do the cached lookup just in case it was created 1312 if (!(nd->flags & LOOKUP_RCU)) {
1298 * while we waited for the directory semaphore, or the first 1313 path_put(&nd->path);
1299 * lookup failed due to an unrelated rename. 1314 } else {
1300 * 1315 nd->flags &= ~LOOKUP_RCU;
1301 * This could use version numbering or similar to avoid unnecessary 1316 if (!(nd->flags & LOOKUP_ROOT))
1302 * cache lookups, but then we'd have to do the first lookup in the 1317 nd->root.mnt = NULL;
1303 * non-racy way. However in the common case here, everything should 1318 rcu_read_unlock();
1304 * be hot in cache, so would it be a big win? 1319 br_read_unlock(vfsmount_lock);
1305 */
1306 dentry = d_lookup(parent, name);
1307 if (likely(!dentry)) {
1308 dentry = d_alloc_and_lookup(parent, name, nd);
1309 mutex_unlock(&dir->i_mutex);
1310 if (IS_ERR(dentry))
1311 goto fail;
1312 goto done;
1313 } 1320 }
1321}
1322
1323static inline int walk_component(struct nameidata *nd, struct path *path,
1324 struct qstr *name, int type, int follow)
1325{
1326 struct inode *inode;
1327 int err;
1314 /* 1328 /*
1315 * Uhhuh! Nasty case: the cache was re-populated while 1329 * "." and ".." are special - ".." especially so because it has
1316 * we waited on the semaphore. Need to revalidate. 1330 * to be able to know about the current root directory and
1331 * parent relationships.
1317 */ 1332 */
1318 mutex_unlock(&dir->i_mutex); 1333 if (unlikely(type != LAST_NORM))
1319 goto found; 1334 return handle_dots(nd, type);
1335 err = do_lookup(nd, name, path, &inode);
1336 if (unlikely(err)) {
1337 terminate_walk(nd);
1338 return err;
1339 }
1340 if (!inode) {
1341 path_to_nameidata(path, nd);
1342 terminate_walk(nd);
1343 return -ENOENT;
1344 }
1345 if (unlikely(inode->i_op->follow_link) && follow) {
1346 if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
1347 return -ECHILD;
1348 BUG_ON(inode != path->dentry->d_inode);
1349 return 1;
1350 }
1351 path_to_nameidata(path, nd);
1352 nd->inode = inode;
1353 return 0;
1354}
1320 1355
1321fail: 1356/*
1322 return PTR_ERR(dentry); 1357 * This limits recursive symlink follows to 8, while
1358 * limiting consecutive symlinks to 40.
1359 *
1360 * Without that kind of total limit, nasty chains of consecutive
1361 * symlinks can cause almost arbitrarily long lookups.
1362 */
1363static inline int nested_symlink(struct path *path, struct nameidata *nd)
1364{
1365 int res;
1366
1367 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1368 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1369 path_put_conditional(path, nd);
1370 path_put(&nd->path);
1371 return -ELOOP;
1372 }
1373
1374 nd->depth++;
1375 current->link_count++;
1376
1377 do {
1378 struct path link = *path;
1379 void *cookie;
1380
1381 res = follow_link(&link, nd, &cookie);
1382 if (!res)
1383 res = walk_component(nd, path, &nd->last,
1384 nd->last_type, LOOKUP_FOLLOW);
1385 put_link(nd, &link, cookie);
1386 } while (res > 0);
1387
1388 current->link_count--;
1389 nd->depth--;
1390 return res;
1323} 1391}
1324 1392
1325/* 1393/*
@@ -1339,30 +1407,18 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1339 while (*name=='/') 1407 while (*name=='/')
1340 name++; 1408 name++;
1341 if (!*name) 1409 if (!*name)
1342 goto return_reval; 1410 return 0;
1343
1344 if (nd->depth)
1345 lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE);
1346 1411
1347 /* At this point we know we have a real path component. */ 1412 /* At this point we know we have a real path component. */
1348 for(;;) { 1413 for(;;) {
1349 struct inode *inode;
1350 unsigned long hash; 1414 unsigned long hash;
1351 struct qstr this; 1415 struct qstr this;
1352 unsigned int c; 1416 unsigned int c;
1417 int type;
1353 1418
1354 nd->flags |= LOOKUP_CONTINUE; 1419 nd->flags |= LOOKUP_CONTINUE;
1355 if (nd->flags & LOOKUP_RCU) { 1420
1356 err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1421 err = may_lookup(nd);
1357 if (err == -ECHILD) {
1358 if (nameidata_drop_rcu(nd))
1359 return -ECHILD;
1360 goto exec_again;
1361 }
1362 } else {
1363exec_again:
1364 err = exec_permission(nd->inode, 0);
1365 }
1366 if (err) 1422 if (err)
1367 break; 1423 break;
1368 1424
@@ -1378,52 +1434,43 @@ exec_again:
1378 this.len = name - (const char *) this.name; 1434 this.len = name - (const char *) this.name;
1379 this.hash = end_name_hash(hash); 1435 this.hash = end_name_hash(hash);
1380 1436
1437 type = LAST_NORM;
1438 if (this.name[0] == '.') switch (this.len) {
1439 case 2:
1440 if (this.name[1] == '.') {
1441 type = LAST_DOTDOT;
1442 nd->flags |= LOOKUP_JUMPED;
1443 }
1444 break;
1445 case 1:
1446 type = LAST_DOT;
1447 }
1448 if (likely(type == LAST_NORM)) {
1449 struct dentry *parent = nd->path.dentry;
1450 nd->flags &= ~LOOKUP_JUMPED;
1451 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1452 err = parent->d_op->d_hash(parent, nd->inode,
1453 &this);
1454 if (err < 0)
1455 break;
1456 }
1457 }
1458
1381 /* remove trailing slashes? */ 1459 /* remove trailing slashes? */
1382 if (!c) 1460 if (!c)
1383 goto last_component; 1461 goto last_component;
1384 while (*++name == '/'); 1462 while (*++name == '/');
1385 if (!*name) 1463 if (!*name)
1386 goto last_with_slashes; 1464 goto last_component;
1387 1465
1388 /* 1466 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1389 * "." and ".." are special - ".." especially so because it has 1467 if (err < 0)
1390 * to be able to know about the current root directory and 1468 return err;
1391 * parent relationships.
1392 */
1393 if (this.name[0] == '.') switch (this.len) {
1394 default:
1395 break;
1396 case 2:
1397 if (this.name[1] != '.')
1398 break;
1399 if (nd->flags & LOOKUP_RCU) {
1400 if (follow_dotdot_rcu(nd))
1401 return -ECHILD;
1402 } else
1403 follow_dotdot(nd);
1404 /* fallthrough */
1405 case 1:
1406 continue;
1407 }
1408 /* This does the actual lookups.. */
1409 err = do_lookup(nd, &this, &next, &inode);
1410 if (err)
1411 break;
1412 err = -ENOENT;
1413 if (!inode)
1414 goto out_dput;
1415 1469
1416 if (inode->i_op->follow_link) { 1470 if (err) {
1417 err = do_follow_link(inode, &next, nd); 1471 err = nested_symlink(&next, nd);
1418 if (err) 1472 if (err)
1419 goto return_err; 1473 return err;
1420 nd->inode = nd->path.dentry->d_inode;
1421 err = -ENOENT;
1422 if (!nd->inode)
1423 break;
1424 } else {
1425 path_to_nameidata(&next, nd);
1426 nd->inode = inode;
1427 } 1474 }
1428 err = -ENOTDIR; 1475 err = -ENOTDIR;
1429 if (!nd->inode->i_op->lookup) 1476 if (!nd->inode->i_op->lookup)
@@ -1431,209 +1478,109 @@ exec_again:
1431 continue; 1478 continue;
1432 /* here ends the main loop */ 1479 /* here ends the main loop */
1433 1480
1434last_with_slashes:
1435 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1436last_component: 1481last_component:
1437 /* Clear LOOKUP_CONTINUE iff it was previously unset */ 1482 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1438 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; 1483 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1439 if (lookup_flags & LOOKUP_PARENT)
1440 goto lookup_parent;
1441 if (this.name[0] == '.') switch (this.len) {
1442 default:
1443 break;
1444 case 2:
1445 if (this.name[1] != '.')
1446 break;
1447 if (nd->flags & LOOKUP_RCU) {
1448 if (follow_dotdot_rcu(nd))
1449 return -ECHILD;
1450 } else
1451 follow_dotdot(nd);
1452 /* fallthrough */
1453 case 1:
1454 goto return_reval;
1455 }
1456 err = do_lookup(nd, &this, &next, &inode);
1457 if (err)
1458 break;
1459 if (inode && unlikely(inode->i_op->follow_link) &&
1460 (lookup_flags & LOOKUP_FOLLOW)) {
1461 err = do_follow_link(inode, &next, nd);
1462 if (err)
1463 goto return_err;
1464 nd->inode = nd->path.dentry->d_inode;
1465 } else {
1466 path_to_nameidata(&next, nd);
1467 nd->inode = inode;
1468 }
1469 err = -ENOENT;
1470 if (!nd->inode)
1471 break;
1472 if (lookup_flags & LOOKUP_DIRECTORY) {
1473 err = -ENOTDIR;
1474 if (!nd->inode->i_op->lookup)
1475 break;
1476 }
1477 goto return_base;
1478lookup_parent:
1479 nd->last = this; 1484 nd->last = this;
1480 nd->last_type = LAST_NORM; 1485 nd->last_type = type;
1481 if (this.name[0] != '.')
1482 goto return_base;
1483 if (this.len == 1)
1484 nd->last_type = LAST_DOT;
1485 else if (this.len == 2 && this.name[1] == '.')
1486 nd->last_type = LAST_DOTDOT;
1487 else
1488 goto return_base;
1489return_reval:
1490 /*
1491 * We bypassed the ordinary revalidation routines.
1492 * We may need to check the cached dentry for staleness.
1493 */
1494 if (need_reval_dot(nd->path.dentry)) {
1495 if (nameidata_drop_rcu_last_maybe(nd))
1496 return -ECHILD;
1497 /* Note: we do not d_invalidate() */
1498 err = d_revalidate(nd->path.dentry, nd);
1499 if (!err)
1500 err = -ESTALE;
1501 if (err < 0)
1502 break;
1503 return 0;
1504 }
1505return_base:
1506 if (nameidata_drop_rcu_last_maybe(nd))
1507 return -ECHILD;
1508 return 0; 1486 return 0;
1509out_dput:
1510 if (!(nd->flags & LOOKUP_RCU))
1511 path_put_conditional(&next, nd);
1512 break;
1513 } 1487 }
1514 if (!(nd->flags & LOOKUP_RCU)) 1488 terminate_walk(nd);
1515 path_put(&nd->path);
1516return_err:
1517 return err; 1489 return err;
1518} 1490}
1519 1491
1520static inline int path_walk_rcu(const char *name, struct nameidata *nd) 1492static int path_init(int dfd, const char *name, unsigned int flags,
1521{ 1493 struct nameidata *nd, struct file **fp)
1522 current->total_link_count = 0;
1523
1524 return link_path_walk(name, nd);
1525}
1526
1527static inline int path_walk_simple(const char *name, struct nameidata *nd)
1528{
1529 current->total_link_count = 0;
1530
1531 return link_path_walk(name, nd);
1532}
1533
1534static int path_walk(const char *name, struct nameidata *nd)
1535{
1536 struct path save = nd->path;
1537 int result;
1538
1539 current->total_link_count = 0;
1540
1541 /* make sure the stuff we saved doesn't go away */
1542 path_get(&save);
1543
1544 result = link_path_walk(name, nd);
1545 if (result == -ESTALE) {
1546 /* nd->path had been dropped */
1547 current->total_link_count = 0;
1548 nd->path = save;
1549 path_get(&nd->path);
1550 nd->flags |= LOOKUP_REVAL;
1551 result = link_path_walk(name, nd);
1552 }
1553
1554 path_put(&save);
1555
1556 return result;
1557}
1558
1559static void path_finish_rcu(struct nameidata *nd)
1560{
1561 if (nd->flags & LOOKUP_RCU) {
1562 /* RCU dangling. Cancel it. */
1563 nd->flags &= ~LOOKUP_RCU;
1564 nd->root.mnt = NULL;
1565 rcu_read_unlock();
1566 br_read_unlock(vfsmount_lock);
1567 }
1568 if (nd->file)
1569 fput(nd->file);
1570}
1571
1572static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
1573{ 1494{
1574 int retval = 0; 1495 int retval = 0;
1575 int fput_needed; 1496 int fput_needed;
1576 struct file *file; 1497 struct file *file;
1577 1498
1578 nd->last_type = LAST_ROOT; /* if there are only slashes... */ 1499 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1579 nd->flags = flags | LOOKUP_RCU; 1500 nd->flags = flags | LOOKUP_JUMPED;
1580 nd->depth = 0; 1501 nd->depth = 0;
1502 if (flags & LOOKUP_ROOT) {
1503 struct inode *inode = nd->root.dentry->d_inode;
1504 if (*name) {
1505 if (!inode->i_op->lookup)
1506 return -ENOTDIR;
1507 retval = inode_permission(inode, MAY_EXEC);
1508 if (retval)
1509 return retval;
1510 }
1511 nd->path = nd->root;
1512 nd->inode = inode;
1513 if (flags & LOOKUP_RCU) {
1514 br_read_lock(vfsmount_lock);
1515 rcu_read_lock();
1516 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1517 } else {
1518 path_get(&nd->path);
1519 }
1520 return 0;
1521 }
1522
1581 nd->root.mnt = NULL; 1523 nd->root.mnt = NULL;
1582 nd->file = NULL;
1583 1524
1584 if (*name=='/') { 1525 if (*name=='/') {
1585 struct fs_struct *fs = current->fs; 1526 if (flags & LOOKUP_RCU) {
1586 unsigned seq; 1527 br_read_lock(vfsmount_lock);
1587 1528 rcu_read_lock();
1588 br_read_lock(vfsmount_lock); 1529 set_root_rcu(nd);
1589 rcu_read_lock(); 1530 } else {
1590 1531 set_root(nd);
1591 do { 1532 path_get(&nd->root);
1592 seq = read_seqcount_begin(&fs->seq); 1533 }
1593 nd->root = fs->root; 1534 nd->path = nd->root;
1594 nd->path = nd->root;
1595 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1596 } while (read_seqcount_retry(&fs->seq, seq));
1597
1598 } else if (dfd == AT_FDCWD) { 1535 } else if (dfd == AT_FDCWD) {
1599 struct fs_struct *fs = current->fs; 1536 if (flags & LOOKUP_RCU) {
1600 unsigned seq; 1537 struct fs_struct *fs = current->fs;
1538 unsigned seq;
1601 1539
1602 br_read_lock(vfsmount_lock); 1540 br_read_lock(vfsmount_lock);
1603 rcu_read_lock(); 1541 rcu_read_lock();
1604
1605 do {
1606 seq = read_seqcount_begin(&fs->seq);
1607 nd->path = fs->pwd;
1608 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1609 } while (read_seqcount_retry(&fs->seq, seq));
1610 1542
1543 do {
1544 seq = read_seqcount_begin(&fs->seq);
1545 nd->path = fs->pwd;
1546 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1547 } while (read_seqcount_retry(&fs->seq, seq));
1548 } else {
1549 get_fs_pwd(current->fs, &nd->path);
1550 }
1611 } else { 1551 } else {
1612 struct dentry *dentry; 1552 struct dentry *dentry;
1613 1553
1614 file = fget_light(dfd, &fput_needed); 1554 file = fget_raw_light(dfd, &fput_needed);
1615 retval = -EBADF; 1555 retval = -EBADF;
1616 if (!file) 1556 if (!file)
1617 goto out_fail; 1557 goto out_fail;
1618 1558
1619 dentry = file->f_path.dentry; 1559 dentry = file->f_path.dentry;
1620 1560
1621 retval = -ENOTDIR; 1561 if (*name) {
1622 if (!S_ISDIR(dentry->d_inode->i_mode)) 1562 retval = -ENOTDIR;
1623 goto fput_fail; 1563 if (!S_ISDIR(dentry->d_inode->i_mode))
1564 goto fput_fail;
1624 1565
1625 retval = file_permission(file, MAY_EXEC); 1566 retval = file_permission(file, MAY_EXEC);
1626 if (retval) 1567 if (retval)
1627 goto fput_fail; 1568 goto fput_fail;
1569 }
1628 1570
1629 nd->path = file->f_path; 1571 nd->path = file->f_path;
1630 if (fput_needed) 1572 if (flags & LOOKUP_RCU) {
1631 nd->file = file; 1573 if (fput_needed)
1632 1574 *fp = file;
1633 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); 1575 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1634 br_read_lock(vfsmount_lock); 1576 br_read_lock(vfsmount_lock);
1635 rcu_read_lock(); 1577 rcu_read_lock();
1578 } else {
1579 path_get(&file->f_path);
1580 fput_light(file, fput_needed);
1581 }
1636 } 1582 }
1583
1637 nd->inode = nd->path.dentry->d_inode; 1584 nd->inode = nd->path.dentry->d_inode;
1638 return 0; 1585 return 0;
1639 1586
@@ -1643,60 +1590,23 @@ out_fail:
1643 return retval; 1590 return retval;
1644} 1591}
1645 1592
1646static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) 1593static inline int lookup_last(struct nameidata *nd, struct path *path)
1647{ 1594{
1648 int retval = 0; 1595 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1649 int fput_needed; 1596 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1650 struct file *file;
1651
1652 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1653 nd->flags = flags;
1654 nd->depth = 0;
1655 nd->root.mnt = NULL;
1656
1657 if (*name=='/') {
1658 set_root(nd);
1659 nd->path = nd->root;
1660 path_get(&nd->root);
1661 } else if (dfd == AT_FDCWD) {
1662 get_fs_pwd(current->fs, &nd->path);
1663 } else {
1664 struct dentry *dentry;
1665
1666 file = fget_light(dfd, &fput_needed);
1667 retval = -EBADF;
1668 if (!file)
1669 goto out_fail;
1670
1671 dentry = file->f_path.dentry;
1672
1673 retval = -ENOTDIR;
1674 if (!S_ISDIR(dentry->d_inode->i_mode))
1675 goto fput_fail;
1676
1677 retval = file_permission(file, MAY_EXEC);
1678 if (retval)
1679 goto fput_fail;
1680 1597
1681 nd->path = file->f_path; 1598 nd->flags &= ~LOOKUP_PARENT;
1682 path_get(&file->f_path); 1599 return walk_component(nd, path, &nd->last, nd->last_type,
1683 1600 nd->flags & LOOKUP_FOLLOW);
1684 fput_light(file, fput_needed);
1685 }
1686 nd->inode = nd->path.dentry->d_inode;
1687 return 0;
1688
1689fput_fail:
1690 fput_light(file, fput_needed);
1691out_fail:
1692 return retval;
1693} 1601}
1694 1602
1695/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1603/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1696static int do_path_lookup(int dfd, const char *name, 1604static int path_lookupat(int dfd, const char *name,
1697 unsigned int flags, struct nameidata *nd) 1605 unsigned int flags, struct nameidata *nd)
1698{ 1606{
1699 int retval; 1607 struct file *base = NULL;
1608 struct path path;
1609 int err;
1700 1610
1701 /* 1611 /*
1702 * Path walking is largely split up into 2 different synchronisation 1612 * Path walking is largely split up into 2 different synchronisation
@@ -1712,44 +1622,78 @@ static int do_path_lookup(int dfd, const char *name,
1712 * be handled by restarting a traditional ref-walk (which will always 1622 * be handled by restarting a traditional ref-walk (which will always
1713 * be able to complete). 1623 * be able to complete).
1714 */ 1624 */
1715 retval = path_init_rcu(dfd, name, flags, nd); 1625 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1716 if (unlikely(retval)) 1626
1717 return retval; 1627 if (unlikely(err))
1718 retval = path_walk_rcu(name, nd); 1628 return err;
1719 path_finish_rcu(nd); 1629
1720 if (nd->root.mnt) { 1630 current->total_link_count = 0;
1721 path_put(&nd->root); 1631 err = link_path_walk(name, nd);
1722 nd->root.mnt = NULL; 1632
1633 if (!err && !(flags & LOOKUP_PARENT)) {
1634 err = lookup_last(nd, &path);
1635 while (err > 0) {
1636 void *cookie;
1637 struct path link = path;
1638 nd->flags |= LOOKUP_PARENT;
1639 err = follow_link(&link, nd, &cookie);
1640 if (!err)
1641 err = lookup_last(nd, &path);
1642 put_link(nd, &link, cookie);
1643 }
1723 } 1644 }
1724 1645
1725 if (unlikely(retval == -ECHILD || retval == -ESTALE)) { 1646 if (nd->flags & LOOKUP_RCU) {
1726 /* slower, locked walk */ 1647 /* went all way through without dropping RCU */
1727 if (retval == -ESTALE) 1648 BUG_ON(err);
1728 flags |= LOOKUP_REVAL; 1649 if (nameidata_drop_rcu_last(nd))
1729 retval = path_init(dfd, name, flags, nd); 1650 err = -ECHILD;
1730 if (unlikely(retval)) 1651 }
1731 return retval; 1652
1732 retval = path_walk(name, nd); 1653 if (!err) {
1733 if (nd->root.mnt) { 1654 err = handle_reval_path(nd);
1734 path_put(&nd->root); 1655 if (err)
1735 nd->root.mnt = NULL; 1656 path_put(&nd->path);
1657 }
1658
1659 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1660 if (!nd->inode->i_op->lookup) {
1661 path_put(&nd->path);
1662 err = -ENOTDIR;
1736 } 1663 }
1737 } 1664 }
1738 1665
1666 if (base)
1667 fput(base);
1668
1669 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1670 path_put(&nd->root);
1671 nd->root.mnt = NULL;
1672 }
1673 return err;
1674}
1675
1676static int do_path_lookup(int dfd, const char *name,
1677 unsigned int flags, struct nameidata *nd)
1678{
1679 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1680 if (unlikely(retval == -ECHILD))
1681 retval = path_lookupat(dfd, name, flags, nd);
1682 if (unlikely(retval == -ESTALE))
1683 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1684
1739 if (likely(!retval)) { 1685 if (likely(!retval)) {
1740 if (unlikely(!audit_dummy_context())) { 1686 if (unlikely(!audit_dummy_context())) {
1741 if (nd->path.dentry && nd->inode) 1687 if (nd->path.dentry && nd->inode)
1742 audit_inode(name, nd->path.dentry); 1688 audit_inode(name, nd->path.dentry);
1743 } 1689 }
1744 } 1690 }
1745
1746 return retval; 1691 return retval;
1747} 1692}
1748 1693
1749int path_lookup(const char *name, unsigned int flags, 1694int kern_path_parent(const char *name, struct nameidata *nd)
1750 struct nameidata *nd)
1751{ 1695{
1752 return do_path_lookup(AT_FDCWD, name, flags, nd); 1696 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1753} 1697}
1754 1698
1755int kern_path(const char *name, unsigned int flags, struct path *path) 1699int kern_path(const char *name, unsigned int flags, struct path *path)
@@ -1773,29 +1717,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1773 const char *name, unsigned int flags, 1717 const char *name, unsigned int flags,
1774 struct nameidata *nd) 1718 struct nameidata *nd)
1775{ 1719{
1776 int retval; 1720 nd->root.dentry = dentry;
1777 1721 nd->root.mnt = mnt;
1778 /* same as do_path_lookup */ 1722 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1779 nd->last_type = LAST_ROOT; 1723 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1780 nd->flags = flags;
1781 nd->depth = 0;
1782
1783 nd->path.dentry = dentry;
1784 nd->path.mnt = mnt;
1785 path_get(&nd->path);
1786 nd->root = nd->path;
1787 path_get(&nd->root);
1788 nd->inode = nd->path.dentry->d_inode;
1789
1790 retval = path_walk(name, nd);
1791 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1792 nd->inode))
1793 audit_inode(name, nd->path.dentry);
1794
1795 path_put(&nd->root);
1796 nd->root.mnt = NULL;
1797
1798 return retval;
1799} 1724}
1800 1725
1801static struct dentry *__lookup_hash(struct qstr *name, 1726static struct dentry *__lookup_hash(struct qstr *name,
@@ -1810,17 +1735,6 @@ static struct dentry *__lookup_hash(struct qstr *name,
1810 return ERR_PTR(err); 1735 return ERR_PTR(err);
1811 1736
1812 /* 1737 /*
1813 * See if the low-level filesystem might want
1814 * to use its own hash..
1815 */
1816 if (base->d_flags & DCACHE_OP_HASH) {
1817 err = base->d_op->d_hash(base, inode, name);
1818 dentry = ERR_PTR(err);
1819 if (err < 0)
1820 goto out;
1821 }
1822
1823 /*
1824 * Don't bother with __d_lookup: callers are for creat as 1738 * Don't bother with __d_lookup: callers are for creat as
1825 * well as unlink, so a lot of the time it would cost 1739 * well as unlink, so a lot of the time it would cost
1826 * a double lookup. 1740 * a double lookup.
@@ -1832,7 +1746,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1832 1746
1833 if (!dentry) 1747 if (!dentry)
1834 dentry = d_alloc_and_lookup(base, name, nd); 1748 dentry = d_alloc_and_lookup(base, name, nd);
1835out: 1749
1836 return dentry; 1750 return dentry;
1837} 1751}
1838 1752
@@ -1846,28 +1760,6 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1846 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1760 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1847} 1761}
1848 1762
1849static int __lookup_one_len(const char *name, struct qstr *this,
1850 struct dentry *base, int len)
1851{
1852 unsigned long hash;
1853 unsigned int c;
1854
1855 this->name = name;
1856 this->len = len;
1857 if (!len)
1858 return -EACCES;
1859
1860 hash = init_name_hash();
1861 while (len--) {
1862 c = *(const unsigned char *)name++;
1863 if (c == '/' || c == '\0')
1864 return -EACCES;
1865 hash = partial_name_hash(c, hash);
1866 }
1867 this->hash = end_name_hash(hash);
1868 return 0;
1869}
1870
1871/** 1763/**
1872 * lookup_one_len - filesystem helper to lookup single pathname component 1764 * lookup_one_len - filesystem helper to lookup single pathname component
1873 * @name: pathname component to lookup 1765 * @name: pathname component to lookup
@@ -1881,14 +1773,34 @@ static int __lookup_one_len(const char *name, struct qstr *this,
1881 */ 1773 */
1882struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) 1774struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1883{ 1775{
1884 int err;
1885 struct qstr this; 1776 struct qstr this;
1777 unsigned long hash;
1778 unsigned int c;
1886 1779
1887 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); 1780 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1888 1781
1889 err = __lookup_one_len(name, &this, base, len); 1782 this.name = name;
1890 if (err) 1783 this.len = len;
1891 return ERR_PTR(err); 1784 if (!len)
1785 return ERR_PTR(-EACCES);
1786
1787 hash = init_name_hash();
1788 while (len--) {
1789 c = *(const unsigned char *)name++;
1790 if (c == '/' || c == '\0')
1791 return ERR_PTR(-EACCES);
1792 hash = partial_name_hash(c, hash);
1793 }
1794 this.hash = end_name_hash(hash);
1795 /*
1796 * See if the low-level filesystem might want
1797 * to use its own hash..
1798 */
1799 if (base->d_flags & DCACHE_OP_HASH) {
1800 int err = base->d_op->d_hash(base, base->d_inode, &this);
1801 if (err < 0)
1802 return ERR_PTR(err);
1803 }
1892 1804
1893 return __lookup_hash(&this, base, NULL); 1805 return __lookup_hash(&this, base, NULL);
1894} 1806}
@@ -1897,7 +1809,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags,
1897 struct path *path) 1809 struct path *path)
1898{ 1810{
1899 struct nameidata nd; 1811 struct nameidata nd;
1900 char *tmp = getname(name); 1812 char *tmp = getname_flags(name, flags);
1901 int err = PTR_ERR(tmp); 1813 int err = PTR_ERR(tmp);
1902 if (!IS_ERR(tmp)) { 1814 if (!IS_ERR(tmp)) {
1903 1815
@@ -1939,11 +1851,15 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
1939 1851
1940 if (!(dir->i_mode & S_ISVTX)) 1852 if (!(dir->i_mode & S_ISVTX))
1941 return 0; 1853 return 0;
1854 if (current_user_ns() != inode_userns(inode))
1855 goto other_userns;
1942 if (inode->i_uid == fsuid) 1856 if (inode->i_uid == fsuid)
1943 return 0; 1857 return 0;
1944 if (dir->i_uid == fsuid) 1858 if (dir->i_uid == fsuid)
1945 return 0; 1859 return 0;
1946 return !capable(CAP_FOWNER); 1860
1861other_userns:
1862 return !ns_capable(inode_userns(inode), CAP_FOWNER);
1947} 1863}
1948 1864
1949/* 1865/*
@@ -2077,12 +1993,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
2077 return error; 1993 return error;
2078} 1994}
2079 1995
2080int may_open(struct path *path, int acc_mode, int flag) 1996static int may_open(struct path *path, int acc_mode, int flag)
2081{ 1997{
2082 struct dentry *dentry = path->dentry; 1998 struct dentry *dentry = path->dentry;
2083 struct inode *inode = dentry->d_inode; 1999 struct inode *inode = dentry->d_inode;
2084 int error; 2000 int error;
2085 2001
2002 /* O_PATH? */
2003 if (!acc_mode)
2004 return 0;
2005
2086 if (!inode) 2006 if (!inode)
2087 return -ENOENT; 2007 return -ENOENT;
2088 2008
@@ -2119,7 +2039,7 @@ int may_open(struct path *path, int acc_mode, int flag)
2119 } 2039 }
2120 2040
2121 /* O_NOATIME can only be set by the owner or superuser */ 2041 /* O_NOATIME can only be set by the owner or superuser */
2122 if (flag & O_NOATIME && !is_owner_or_cap(inode)) 2042 if (flag & O_NOATIME && !inode_owner_or_capable(inode))
2123 return -EPERM; 2043 return -EPERM;
2124 2044
2125 /* 2045 /*
@@ -2151,34 +2071,6 @@ static int handle_truncate(struct file *filp)
2151} 2071}
2152 2072
2153/* 2073/*
2154 * Be careful about ever adding any more callers of this
2155 * function. Its flags must be in the namei format, not
2156 * what get passed to sys_open().
2157 */
2158static int __open_namei_create(struct nameidata *nd, struct path *path,
2159 int open_flag, int mode)
2160{
2161 int error;
2162 struct dentry *dir = nd->path.dentry;
2163
2164 if (!IS_POSIXACL(dir->d_inode))
2165 mode &= ~current_umask();
2166 error = security_path_mknod(&nd->path, path->dentry, mode, 0);
2167 if (error)
2168 goto out_unlock;
2169 error = vfs_create(dir->d_inode, path->dentry, mode, nd);
2170out_unlock:
2171 mutex_unlock(&dir->d_inode->i_mutex);
2172 dput(nd->path.dentry);
2173 nd->path.dentry = path->dentry;
2174
2175 if (error)
2176 return error;
2177 /* Don't check for write permission, don't truncate */
2178 return may_open(&nd->path, 0, open_flag & ~O_TRUNC);
2179}
2180
2181/*
2182 * Note that while the flag value (low two bits) for sys_open means: 2074 * Note that while the flag value (low two bits) for sys_open means:
2183 * 00 - read-only 2075 * 00 - read-only
2184 * 01 - write-only 2076 * 01 - write-only
@@ -2202,126 +2094,115 @@ static inline int open_to_namei_flags(int flag)
2202 return flag; 2094 return flag;
2203} 2095}
2204 2096
2205static int open_will_truncate(int flag, struct inode *inode)
2206{
2207 /*
2208 * We'll never write to the fs underlying
2209 * a device file.
2210 */
2211 if (special_file(inode->i_mode))
2212 return 0;
2213 return (flag & O_TRUNC);
2214}
2215
2216static struct file *finish_open(struct nameidata *nd,
2217 int open_flag, int acc_mode)
2218{
2219 struct file *filp;
2220 int will_truncate;
2221 int error;
2222
2223 will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode);
2224 if (will_truncate) {
2225 error = mnt_want_write(nd->path.mnt);
2226 if (error)
2227 goto exit;
2228 }
2229 error = may_open(&nd->path, acc_mode, open_flag);
2230 if (error) {
2231 if (will_truncate)
2232 mnt_drop_write(nd->path.mnt);
2233 goto exit;
2234 }
2235 filp = nameidata_to_filp(nd);
2236 if (!IS_ERR(filp)) {
2237 error = ima_file_check(filp, acc_mode);
2238 if (error) {
2239 fput(filp);
2240 filp = ERR_PTR(error);
2241 }
2242 }
2243 if (!IS_ERR(filp)) {
2244 if (will_truncate) {
2245 error = handle_truncate(filp);
2246 if (error) {
2247 fput(filp);
2248 filp = ERR_PTR(error);
2249 }
2250 }
2251 }
2252 /*
2253 * It is now safe to drop the mnt write
2254 * because the filp has had a write taken
2255 * on its behalf.
2256 */
2257 if (will_truncate)
2258 mnt_drop_write(nd->path.mnt);
2259 path_put(&nd->path);
2260 return filp;
2261
2262exit:
2263 path_put(&nd->path);
2264 return ERR_PTR(error);
2265}
2266
2267/* 2097/*
2268 * Handle O_CREAT case for do_filp_open 2098 * Handle the last step of open()
2269 */ 2099 */
2270static struct file *do_last(struct nameidata *nd, struct path *path, 2100static struct file *do_last(struct nameidata *nd, struct path *path,
2271 int open_flag, int acc_mode, 2101 const struct open_flags *op, const char *pathname)
2272 int mode, const char *pathname)
2273{ 2102{
2274 struct dentry *dir = nd->path.dentry; 2103 struct dentry *dir = nd->path.dentry;
2104 struct dentry *dentry;
2105 int open_flag = op->open_flag;
2106 int will_truncate = open_flag & O_TRUNC;
2107 int want_write = 0;
2108 int acc_mode = op->acc_mode;
2275 struct file *filp; 2109 struct file *filp;
2276 int error = -EISDIR; 2110 int error;
2111
2112 nd->flags &= ~LOOKUP_PARENT;
2113 nd->flags |= op->intent;
2277 2114
2278 switch (nd->last_type) { 2115 switch (nd->last_type) {
2279 case LAST_DOTDOT: 2116 case LAST_DOTDOT:
2280 follow_dotdot(nd);
2281 dir = nd->path.dentry;
2282 case LAST_DOT: 2117 case LAST_DOT:
2283 if (need_reval_dot(dir)) { 2118 error = handle_dots(nd, nd->last_type);
2284 int status = d_revalidate(nd->path.dentry, nd); 2119 if (error)
2285 if (!status) 2120 return ERR_PTR(error);
2286 status = -ESTALE;
2287 if (status < 0) {
2288 error = status;
2289 goto exit;
2290 }
2291 }
2292 /* fallthrough */ 2121 /* fallthrough */
2293 case LAST_ROOT: 2122 case LAST_ROOT:
2294 goto exit; 2123 if (nd->flags & LOOKUP_RCU) {
2124 if (nameidata_drop_rcu_last(nd))
2125 return ERR_PTR(-ECHILD);
2126 }
2127 error = handle_reval_path(nd);
2128 if (error)
2129 goto exit;
2130 audit_inode(pathname, nd->path.dentry);
2131 if (open_flag & O_CREAT) {
2132 error = -EISDIR;
2133 goto exit;
2134 }
2135 goto ok;
2295 case LAST_BIND: 2136 case LAST_BIND:
2137 /* can't be RCU mode here */
2138 error = handle_reval_path(nd);
2139 if (error)
2140 goto exit;
2296 audit_inode(pathname, dir); 2141 audit_inode(pathname, dir);
2297 goto ok; 2142 goto ok;
2298 } 2143 }
2299 2144
2145 if (!(open_flag & O_CREAT)) {
2146 int symlink_ok = 0;
2147 if (nd->last.name[nd->last.len])
2148 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2149 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2150 symlink_ok = 1;
2151 /* we _can_ be in RCU mode here */
2152 error = walk_component(nd, path, &nd->last, LAST_NORM,
2153 !symlink_ok);
2154 if (error < 0)
2155 return ERR_PTR(error);
2156 if (error) /* symlink */
2157 return NULL;
2158 /* sayonara */
2159 if (nd->flags & LOOKUP_RCU) {
2160 if (nameidata_drop_rcu_last(nd))
2161 return ERR_PTR(-ECHILD);
2162 }
2163
2164 error = -ENOTDIR;
2165 if (nd->flags & LOOKUP_DIRECTORY) {
2166 if (!nd->inode->i_op->lookup)
2167 goto exit;
2168 }
2169 audit_inode(pathname, nd->path.dentry);
2170 goto ok;
2171 }
2172
2173 /* create side of things */
2174
2175 if (nd->flags & LOOKUP_RCU) {
2176 if (nameidata_drop_rcu_last(nd))
2177 return ERR_PTR(-ECHILD);
2178 }
2179
2180 audit_inode(pathname, dir);
2181 error = -EISDIR;
2300 /* trailing slashes? */ 2182 /* trailing slashes? */
2301 if (nd->last.name[nd->last.len]) 2183 if (nd->last.name[nd->last.len])
2302 goto exit; 2184 goto exit;
2303 2185
2304 mutex_lock(&dir->d_inode->i_mutex); 2186 mutex_lock(&dir->d_inode->i_mutex);
2305 2187
2306 path->dentry = lookup_hash(nd); 2188 dentry = lookup_hash(nd);
2307 path->mnt = nd->path.mnt; 2189 error = PTR_ERR(dentry);
2308 2190 if (IS_ERR(dentry)) {
2309 error = PTR_ERR(path->dentry);
2310 if (IS_ERR(path->dentry)) {
2311 mutex_unlock(&dir->d_inode->i_mutex); 2191 mutex_unlock(&dir->d_inode->i_mutex);
2312 goto exit; 2192 goto exit;
2313 } 2193 }
2314 2194
2315 if (IS_ERR(nd->intent.open.file)) { 2195 path->dentry = dentry;
2316 error = PTR_ERR(nd->intent.open.file); 2196 path->mnt = nd->path.mnt;
2317 goto exit_mutex_unlock;
2318 }
2319 2197
2320 /* Negative dentry, just create the file */ 2198 /* Negative dentry, just create the file */
2321 if (!path->dentry->d_inode) { 2199 if (!dentry->d_inode) {
2200 int mode = op->mode;
2201 if (!IS_POSIXACL(dir->d_inode))
2202 mode &= ~current_umask();
2322 /* 2203 /*
2323 * This write is needed to ensure that a 2204 * This write is needed to ensure that a
2324 * ro->rw transition does not occur between 2205 * rw->ro transition does not occur between
2325 * the time when the file is created and when 2206 * the time when the file is created and when
2326 * a permanent write count is taken through 2207 * a permanent write count is taken through
2327 * the 'struct file' in nameidata_to_filp(). 2208 * the 'struct file' in nameidata_to_filp().
@@ -2329,22 +2210,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2329 error = mnt_want_write(nd->path.mnt); 2210 error = mnt_want_write(nd->path.mnt);
2330 if (error) 2211 if (error)
2331 goto exit_mutex_unlock; 2212 goto exit_mutex_unlock;
2332 error = __open_namei_create(nd, path, open_flag, mode); 2213 want_write = 1;
2333 if (error) { 2214 /* Don't check for write permission, don't truncate */
2334 mnt_drop_write(nd->path.mnt); 2215 open_flag &= ~O_TRUNC;
2335 goto exit; 2216 will_truncate = 0;
2336 } 2217 acc_mode = MAY_OPEN;
2337 filp = nameidata_to_filp(nd); 2218 error = security_path_mknod(&nd->path, dentry, mode, 0);
2338 mnt_drop_write(nd->path.mnt); 2219 if (error)
2339 path_put(&nd->path); 2220 goto exit_mutex_unlock;
2340 if (!IS_ERR(filp)) { 2221 error = vfs_create(dir->d_inode, dentry, mode, nd);
2341 error = ima_file_check(filp, acc_mode); 2222 if (error)
2342 if (error) { 2223 goto exit_mutex_unlock;
2343 fput(filp); 2224 mutex_unlock(&dir->d_inode->i_mutex);
2344 filp = ERR_PTR(error); 2225 dput(nd->path.dentry);
2345 } 2226 nd->path.dentry = dentry;
2346 } 2227 goto common;
2347 return filp;
2348 } 2228 }
2349 2229
2350 /* 2230 /*
@@ -2374,7 +2254,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
2374 if (S_ISDIR(nd->inode->i_mode)) 2254 if (S_ISDIR(nd->inode->i_mode))
2375 goto exit; 2255 goto exit;
2376ok: 2256ok:
2377 filp = finish_open(nd, open_flag, acc_mode); 2257 if (!S_ISREG(nd->inode->i_mode))
2258 will_truncate = 0;
2259
2260 if (will_truncate) {
2261 error = mnt_want_write(nd->path.mnt);
2262 if (error)
2263 goto exit;
2264 want_write = 1;
2265 }
2266common:
2267 error = may_open(&nd->path, acc_mode, open_flag);
2268 if (error)
2269 goto exit;
2270 filp = nameidata_to_filp(nd);
2271 if (!IS_ERR(filp)) {
2272 error = ima_file_check(filp, op->acc_mode);
2273 if (error) {
2274 fput(filp);
2275 filp = ERR_PTR(error);
2276 }
2277 }
2278 if (!IS_ERR(filp)) {
2279 if (will_truncate) {
2280 error = handle_truncate(filp);
2281 if (error) {
2282 fput(filp);
2283 filp = ERR_PTR(error);
2284 }
2285 }
2286 }
2287out:
2288 if (want_write)
2289 mnt_drop_write(nd->path.mnt);
2290 path_put(&nd->path);
2378 return filp; 2291 return filp;
2379 2292
2380exit_mutex_unlock: 2293exit_mutex_unlock:
@@ -2382,197 +2295,103 @@ exit_mutex_unlock:
2382exit_dput: 2295exit_dput:
2383 path_put_conditional(path, nd); 2296 path_put_conditional(path, nd);
2384exit: 2297exit:
2385 path_put(&nd->path); 2298 filp = ERR_PTR(error);
2386 return ERR_PTR(error); 2299 goto out;
2387} 2300}
2388 2301
2389/* 2302static struct file *path_openat(int dfd, const char *pathname,
2390 * Note that the low bits of the passed in "open_flag" 2303 struct nameidata *nd, const struct open_flags *op, int flags)
2391 * are not the same as in the local variable "flag". See
2392 * open_to_namei_flags() for more details.
2393 */
2394struct file *do_filp_open(int dfd, const char *pathname,
2395 int open_flag, int mode, int acc_mode)
2396{ 2304{
2305 struct file *base = NULL;
2397 struct file *filp; 2306 struct file *filp;
2398 struct nameidata nd;
2399 int error;
2400 struct path path; 2307 struct path path;
2401 int count = 0; 2308 int error;
2402 int flag = open_to_namei_flags(open_flag);
2403 int flags;
2404
2405 if (!(open_flag & O_CREAT))
2406 mode = 0;
2407
2408 /* Must never be set by userspace */
2409 open_flag &= ~FMODE_NONOTIFY;
2410
2411 /*
2412 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
2413 * check for O_DSYNC if the need any syncing at all we enforce it's
2414 * always set instead of having to deal with possibly weird behaviour
2415 * for malicious applications setting only __O_SYNC.
2416 */
2417 if (open_flag & __O_SYNC)
2418 open_flag |= O_DSYNC;
2419
2420 if (!acc_mode)
2421 acc_mode = MAY_OPEN | ACC_MODE(open_flag);
2422
2423 /* O_TRUNC implies we need access checks for write permissions */
2424 if (open_flag & O_TRUNC)
2425 acc_mode |= MAY_WRITE;
2426
2427 /* Allow the LSM permission hook to distinguish append
2428 access from general write access. */
2429 if (open_flag & O_APPEND)
2430 acc_mode |= MAY_APPEND;
2431
2432 flags = LOOKUP_OPEN;
2433 if (open_flag & O_CREAT) {
2434 flags |= LOOKUP_CREATE;
2435 if (open_flag & O_EXCL)
2436 flags |= LOOKUP_EXCL;
2437 }
2438 if (open_flag & O_DIRECTORY)
2439 flags |= LOOKUP_DIRECTORY;
2440 if (!(open_flag & O_NOFOLLOW))
2441 flags |= LOOKUP_FOLLOW;
2442 2309
2443 filp = get_empty_filp(); 2310 filp = get_empty_filp();
2444 if (!filp) 2311 if (!filp)
2445 return ERR_PTR(-ENFILE); 2312 return ERR_PTR(-ENFILE);
2446 2313
2447 filp->f_flags = open_flag; 2314 filp->f_flags = op->open_flag;
2448 nd.intent.open.file = filp; 2315 nd->intent.open.file = filp;
2449 nd.intent.open.flags = flag; 2316 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2450 nd.intent.open.create_mode = mode; 2317 nd->intent.open.create_mode = op->mode;
2451 2318
2452 if (open_flag & O_CREAT) 2319 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2453 goto creat;
2454
2455 /* !O_CREAT, simple open */
2456 error = do_path_lookup(dfd, pathname, flags, &nd);
2457 if (unlikely(error)) 2320 if (unlikely(error))
2458 goto out_filp; 2321 goto out_filp;
2459 error = -ELOOP;
2460 if (!(nd.flags & LOOKUP_FOLLOW)) {
2461 if (nd.inode->i_op->follow_link)
2462 goto out_path;
2463 }
2464 error = -ENOTDIR;
2465 if (nd.flags & LOOKUP_DIRECTORY) {
2466 if (!nd.inode->i_op->lookup)
2467 goto out_path;
2468 }
2469 audit_inode(pathname, nd.path.dentry);
2470 filp = finish_open(&nd, open_flag, acc_mode);
2471 release_open_intent(&nd);
2472 return filp;
2473
2474creat:
2475 /* OK, have to create the file. Find the parent. */
2476 error = path_init_rcu(dfd, pathname,
2477 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2478 if (error)
2479 goto out_filp;
2480 error = path_walk_rcu(pathname, &nd);
2481 path_finish_rcu(&nd);
2482 if (unlikely(error == -ECHILD || error == -ESTALE)) {
2483 /* slower, locked walk */
2484 if (error == -ESTALE) {
2485reval:
2486 flags |= LOOKUP_REVAL;
2487 }
2488 error = path_init(dfd, pathname,
2489 LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd);
2490 if (error)
2491 goto out_filp;
2492 2322
2493 error = path_walk_simple(pathname, &nd); 2323 current->total_link_count = 0;
2494 } 2324 error = link_path_walk(pathname, nd);
2495 if (unlikely(error)) 2325 if (unlikely(error))
2496 goto out_filp; 2326 goto out_filp;
2497 if (unlikely(!audit_dummy_context()))
2498 audit_inode(pathname, nd.path.dentry);
2499 2327
2500 /* 2328 filp = do_last(nd, &path, op, pathname);
2501 * We have the parent and last component.
2502 */
2503 nd.flags = flags;
2504 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
2505 while (unlikely(!filp)) { /* trailing symlink */ 2329 while (unlikely(!filp)) { /* trailing symlink */
2506 struct path link = path; 2330 struct path link = path;
2507 struct inode *linki = link.dentry->d_inode;
2508 void *cookie; 2331 void *cookie;
2509 error = -ELOOP; 2332 if (!(nd->flags & LOOKUP_FOLLOW)) {
2510 if (!(nd.flags & LOOKUP_FOLLOW)) 2333 path_put_conditional(&path, nd);
2511 goto exit_dput; 2334 path_put(&nd->path);
2512 if (count++ == 32) 2335 filp = ERR_PTR(-ELOOP);
2513 goto exit_dput; 2336 break;
2514 /*
2515 * This is subtle. Instead of calling do_follow_link() we do
2516 * the thing by hands. The reason is that this way we have zero
2517 * link_count and path_walk() (called from ->follow_link)
2518 * honoring LOOKUP_PARENT. After that we have the parent and
2519 * last component, i.e. we are in the same situation as after
2520 * the first path_walk(). Well, almost - if the last component
2521 * is normal we get its copy stored in nd->last.name and we will
2522 * have to putname() it when we are done. Procfs-like symlinks
2523 * just set LAST_BIND.
2524 */
2525 nd.flags |= LOOKUP_PARENT;
2526 error = security_inode_follow_link(link.dentry, &nd);
2527 if (error)
2528 goto exit_dput;
2529 error = __do_follow_link(&link, &nd, &cookie);
2530 if (unlikely(error)) {
2531 if (!IS_ERR(cookie) && linki->i_op->put_link)
2532 linki->i_op->put_link(link.dentry, &nd, cookie);
2533 /* nd.path had been dropped */
2534 nd.path = link;
2535 goto out_path;
2536 } 2337 }
2537 nd.flags &= ~LOOKUP_PARENT; 2338 nd->flags |= LOOKUP_PARENT;
2538 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 2339 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2539 if (linki->i_op->put_link) 2340 error = follow_link(&link, nd, &cookie);
2540 linki->i_op->put_link(link.dentry, &nd, cookie); 2341 if (unlikely(error))
2541 path_put(&link); 2342 filp = ERR_PTR(error);
2343 else
2344 filp = do_last(nd, &path, op, pathname);
2345 put_link(nd, &link, cookie);
2542 } 2346 }
2543out: 2347out:
2544 if (nd.root.mnt) 2348 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2545 path_put(&nd.root); 2349 path_put(&nd->root);
2546 if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) 2350 if (base)
2547 goto reval; 2351 fput(base);
2548 release_open_intent(&nd); 2352 release_open_intent(nd);
2549 return filp; 2353 return filp;
2550 2354
2551exit_dput:
2552 path_put_conditional(&path, &nd);
2553out_path:
2554 path_put(&nd.path);
2555out_filp: 2355out_filp:
2556 filp = ERR_PTR(error); 2356 filp = ERR_PTR(error);
2557 goto out; 2357 goto out;
2558} 2358}
2559 2359
2560/** 2360struct file *do_filp_open(int dfd, const char *pathname,
2561 * filp_open - open file and return file pointer 2361 const struct open_flags *op, int flags)
2562 *
2563 * @filename: path to open
2564 * @flags: open flags as per the open(2) second argument
2565 * @mode: mode for the new file if O_CREAT is set, else ignored
2566 *
2567 * This is the helper to open a file from kernelspace if you really
2568 * have to. But in generally you should not do this, so please move
2569 * along, nothing to see here..
2570 */
2571struct file *filp_open(const char *filename, int flags, int mode)
2572{ 2362{
2573 return do_filp_open(AT_FDCWD, filename, flags, mode, 0); 2363 struct nameidata nd;
2364 struct file *filp;
2365
2366 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2367 if (unlikely(filp == ERR_PTR(-ECHILD)))
2368 filp = path_openat(dfd, pathname, &nd, op, flags);
2369 if (unlikely(filp == ERR_PTR(-ESTALE)))
2370 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2371 return filp;
2372}
2373
2374struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2375 const char *name, const struct open_flags *op, int flags)
2376{
2377 struct nameidata nd;
2378 struct file *file;
2379
2380 nd.root.mnt = mnt;
2381 nd.root.dentry = dentry;
2382
2383 flags |= LOOKUP_ROOT;
2384
2385 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2386 return ERR_PTR(-ELOOP);
2387
2388 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2389 if (unlikely(file == ERR_PTR(-ECHILD)))
2390 file = path_openat(-1, name, &nd, op, flags);
2391 if (unlikely(file == ERR_PTR(-ESTALE)))
2392 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2393 return file;
2574} 2394}
2575EXPORT_SYMBOL(filp_open);
2576 2395
2577/** 2396/**
2578 * lookup_create - lookup a dentry, creating it if it doesn't exist 2397 * lookup_create - lookup a dentry, creating it if it doesn't exist
@@ -2634,7 +2453,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2634 if (error) 2453 if (error)
2635 return error; 2454 return error;
2636 2455
2637 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) 2456 if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
2457 !ns_capable(inode_userns(dir), CAP_MKNOD))
2638 return -EPERM; 2458 return -EPERM;
2639 2459
2640 if (!dir->i_op->mknod) 2460 if (!dir->i_op->mknod)
@@ -3111,7 +2931,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
3111 return error; 2931 return error;
3112 2932
3113 mutex_lock(&inode->i_mutex); 2933 mutex_lock(&inode->i_mutex);
3114 error = dir->i_op->link(old_dentry, dir, new_dentry); 2934 /* Make sure we don't allow creating hardlink to an unlinked file */
2935 if (inode->i_nlink == 0)
2936 error = -ENOENT;
2937 else
2938 error = dir->i_op->link(old_dentry, dir, new_dentry);
3115 mutex_unlock(&inode->i_mutex); 2939 mutex_unlock(&inode->i_mutex);
3116 if (!error) 2940 if (!error)
3117 fsnotify_link(dir, inode, new_dentry); 2941 fsnotify_link(dir, inode, new_dentry);
@@ -3133,15 +2957,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
3133 struct dentry *new_dentry; 2957 struct dentry *new_dentry;
3134 struct nameidata nd; 2958 struct nameidata nd;
3135 struct path old_path; 2959 struct path old_path;
2960 int how = 0;
3136 int error; 2961 int error;
3137 char *to; 2962 char *to;
3138 2963
3139 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2964 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
3140 return -EINVAL; 2965 return -EINVAL;
2966 /*
2967 * To use null names we require CAP_DAC_READ_SEARCH
2968 * This ensures that not everyone will be able to create
2969 * handlink using the passed filedescriptor.
2970 */
2971 if (flags & AT_EMPTY_PATH) {
2972 if (!capable(CAP_DAC_READ_SEARCH))
2973 return -ENOENT;
2974 how = LOOKUP_EMPTY;
2975 }
2976
2977 if (flags & AT_SYMLINK_FOLLOW)
2978 how |= LOOKUP_FOLLOW;
3141 2979
3142 error = user_path_at(olddfd, oldname, 2980 error = user_path_at(olddfd, oldname, how, &old_path);
3143 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
3144 &old_path);
3145 if (error) 2981 if (error)
3146 return error; 2982 return error;
3147 2983
@@ -3578,7 +3414,7 @@ EXPORT_SYMBOL(page_readlink);
3578EXPORT_SYMBOL(__page_symlink); 3414EXPORT_SYMBOL(__page_symlink);
3579EXPORT_SYMBOL(page_symlink); 3415EXPORT_SYMBOL(page_symlink);
3580EXPORT_SYMBOL(page_symlink_inode_operations); 3416EXPORT_SYMBOL(page_symlink_inode_operations);
3581EXPORT_SYMBOL(path_lookup); 3417EXPORT_SYMBOL(kern_path_parent);
3582EXPORT_SYMBOL(kern_path); 3418EXPORT_SYMBOL(kern_path);
3583EXPORT_SYMBOL(vfs_path_lookup); 3419EXPORT_SYMBOL(vfs_path_lookup);
3584EXPORT_SYMBOL(inode_permission); 3420EXPORT_SYMBOL(inode_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index d1edf26025dc..7dba2ed03429 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -196,7 +196,7 @@ unsigned int mnt_get_count(struct vfsmount *mnt)
196#endif 196#endif
197} 197}
198 198
199struct vfsmount *alloc_vfsmnt(const char *name) 199static struct vfsmount *alloc_vfsmnt(const char *name)
200{ 200{
201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 201 struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
202 if (mnt) { 202 if (mnt) {
@@ -466,15 +466,7 @@ static void __mnt_unmake_readonly(struct vfsmount *mnt)
466 br_write_unlock(vfsmount_lock); 466 br_write_unlock(vfsmount_lock);
467} 467}
468 468
469void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) 469static void free_vfsmnt(struct vfsmount *mnt)
470{
471 mnt->mnt_sb = sb;
472 mnt->mnt_root = dget(sb->s_root);
473}
474
475EXPORT_SYMBOL(simple_set_mnt);
476
477void free_vfsmnt(struct vfsmount *mnt)
478{ 470{
479 kfree(mnt->mnt_devname); 471 kfree(mnt->mnt_devname);
480 mnt_free_id(mnt); 472 mnt_free_id(mnt);
@@ -678,6 +670,36 @@ static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
678 return p; 670 return p;
679} 671}
680 672
673struct vfsmount *
674vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
675{
676 struct vfsmount *mnt;
677 struct dentry *root;
678
679 if (!type)
680 return ERR_PTR(-ENODEV);
681
682 mnt = alloc_vfsmnt(name);
683 if (!mnt)
684 return ERR_PTR(-ENOMEM);
685
686 if (flags & MS_KERNMOUNT)
687 mnt->mnt_flags = MNT_INTERNAL;
688
689 root = mount_fs(type, flags, name, data);
690 if (IS_ERR(root)) {
691 free_vfsmnt(mnt);
692 return ERR_CAST(root);
693 }
694
695 mnt->mnt_root = root;
696 mnt->mnt_sb = root->d_sb;
697 mnt->mnt_mountpoint = mnt->mnt_root;
698 mnt->mnt_parent = mnt;
699 return mnt;
700}
701EXPORT_SYMBOL_GPL(vfs_kern_mount);
702
681static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, 703static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
682 int flag) 704 int flag)
683{ 705{
@@ -978,7 +1000,13 @@ static int show_vfsmnt(struct seq_file *m, void *v)
978 int err = 0; 1000 int err = 0;
979 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; 1001 struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
980 1002
981 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 1003 if (mnt->mnt_sb->s_op->show_devname) {
1004 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
1005 if (err)
1006 goto out;
1007 } else {
1008 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
1009 }
982 seq_putc(m, ' '); 1010 seq_putc(m, ' ');
983 seq_path(m, &mnt_path, " \t\n\\"); 1011 seq_path(m, &mnt_path, " \t\n\\");
984 seq_putc(m, ' '); 1012 seq_putc(m, ' ');
@@ -1002,6 +1030,18 @@ const struct seq_operations mounts_op = {
1002 .show = show_vfsmnt 1030 .show = show_vfsmnt
1003}; 1031};
1004 1032
1033static int uuid_is_nil(u8 *uuid)
1034{
1035 int i;
1036 u8 *cp = (u8 *)uuid;
1037
1038 for (i = 0; i < 16; i++) {
1039 if (*cp++)
1040 return 0;
1041 }
1042 return 1;
1043}
1044
1005static int show_mountinfo(struct seq_file *m, void *v) 1045static int show_mountinfo(struct seq_file *m, void *v)
1006{ 1046{
1007 struct proc_mounts *p = m->private; 1047 struct proc_mounts *p = m->private;
@@ -1013,7 +1053,12 @@ static int show_mountinfo(struct seq_file *m, void *v)
1013 1053
1014 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id, 1054 seq_printf(m, "%i %i %u:%u ", mnt->mnt_id, mnt->mnt_parent->mnt_id,
1015 MAJOR(sb->s_dev), MINOR(sb->s_dev)); 1055 MAJOR(sb->s_dev), MINOR(sb->s_dev));
1016 seq_dentry(m, mnt->mnt_root, " \t\n\\"); 1056 if (sb->s_op->show_path)
1057 err = sb->s_op->show_path(m, mnt);
1058 else
1059 seq_dentry(m, mnt->mnt_root, " \t\n\\");
1060 if (err)
1061 goto out;
1017 seq_putc(m, ' '); 1062 seq_putc(m, ' ');
1018 seq_path_root(m, &mnt_path, &root, " \t\n\\"); 1063 seq_path_root(m, &mnt_path, &root, " \t\n\\");
1019 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) { 1064 if (root.mnt != p->root.mnt || root.dentry != p->root.dentry) {
@@ -1040,11 +1085,20 @@ static int show_mountinfo(struct seq_file *m, void *v)
1040 if (IS_MNT_UNBINDABLE(mnt)) 1085 if (IS_MNT_UNBINDABLE(mnt))
1041 seq_puts(m, " unbindable"); 1086 seq_puts(m, " unbindable");
1042 1087
1088 if (!uuid_is_nil(mnt->mnt_sb->s_uuid))
1089 /* print the uuid */
1090 seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid);
1091
1043 /* Filesystem specific data */ 1092 /* Filesystem specific data */
1044 seq_puts(m, " - "); 1093 seq_puts(m, " - ");
1045 show_type(m, sb); 1094 show_type(m, sb);
1046 seq_putc(m, ' '); 1095 seq_putc(m, ' ');
1047 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); 1096 if (sb->s_op->show_devname)
1097 err = sb->s_op->show_devname(m, mnt);
1098 else
1099 mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
1100 if (err)
1101 goto out;
1048 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw"); 1102 seq_puts(m, sb->s_flags & MS_RDONLY ? " ro" : " rw");
1049 err = show_sb_opts(m, sb); 1103 err = show_sb_opts(m, sb);
1050 if (err) 1104 if (err)
@@ -1070,11 +1124,15 @@ static int show_vfsstat(struct seq_file *m, void *v)
1070 int err = 0; 1124 int err = 0;
1071 1125
1072 /* device */ 1126 /* device */
1073 if (mnt->mnt_devname) { 1127 if (mnt->mnt_sb->s_op->show_devname) {
1074 seq_puts(m, "device "); 1128 err = mnt->mnt_sb->s_op->show_devname(m, mnt);
1075 mangle(m, mnt->mnt_devname); 1129 } else {
1076 } else 1130 if (mnt->mnt_devname) {
1077 seq_puts(m, "no device"); 1131 seq_puts(m, "device ");
1132 mangle(m, mnt->mnt_devname);
1133 } else
1134 seq_puts(m, "no device");
1135 }
1078 1136
1079 /* mount point */ 1137 /* mount point */
1080 seq_puts(m, " mounted on "); 1138 seq_puts(m, " mounted on ");
@@ -1088,7 +1146,8 @@ static int show_vfsstat(struct seq_file *m, void *v)
1088 /* optional statistics */ 1146 /* optional statistics */
1089 if (mnt->mnt_sb->s_op->show_stats) { 1147 if (mnt->mnt_sb->s_op->show_stats) {
1090 seq_putc(m, ' '); 1148 seq_putc(m, ' ');
1091 err = mnt->mnt_sb->s_op->show_stats(m, mnt); 1149 if (!err)
1150 err = mnt->mnt_sb->s_op->show_stats(m, mnt);
1092 } 1151 }
1093 1152
1094 seq_putc(m, '\n'); 1153 seq_putc(m, '\n');
@@ -1604,9 +1663,35 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
1604 return err; 1663 return err;
1605} 1664}
1606 1665
1666static int lock_mount(struct path *path)
1667{
1668 struct vfsmount *mnt;
1669retry:
1670 mutex_lock(&path->dentry->d_inode->i_mutex);
1671 if (unlikely(cant_mount(path->dentry))) {
1672 mutex_unlock(&path->dentry->d_inode->i_mutex);
1673 return -ENOENT;
1674 }
1675 down_write(&namespace_sem);
1676 mnt = lookup_mnt(path);
1677 if (likely(!mnt))
1678 return 0;
1679 up_write(&namespace_sem);
1680 mutex_unlock(&path->dentry->d_inode->i_mutex);
1681 path_put(path);
1682 path->mnt = mnt;
1683 path->dentry = dget(mnt->mnt_root);
1684 goto retry;
1685}
1686
1687static void unlock_mount(struct path *path)
1688{
1689 up_write(&namespace_sem);
1690 mutex_unlock(&path->dentry->d_inode->i_mutex);
1691}
1692
1607static int graft_tree(struct vfsmount *mnt, struct path *path) 1693static int graft_tree(struct vfsmount *mnt, struct path *path)
1608{ 1694{
1609 int err;
1610 if (mnt->mnt_sb->s_flags & MS_NOUSER) 1695 if (mnt->mnt_sb->s_flags & MS_NOUSER)
1611 return -EINVAL; 1696 return -EINVAL;
1612 1697
@@ -1614,16 +1699,10 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
1614 S_ISDIR(mnt->mnt_root->d_inode->i_mode)) 1699 S_ISDIR(mnt->mnt_root->d_inode->i_mode))
1615 return -ENOTDIR; 1700 return -ENOTDIR;
1616 1701
1617 err = -ENOENT; 1702 if (d_unlinked(path->dentry))
1618 mutex_lock(&path->dentry->d_inode->i_mutex); 1703 return -ENOENT;
1619 if (cant_mount(path->dentry))
1620 goto out_unlock;
1621 1704
1622 if (!d_unlinked(path->dentry)) 1705 return attach_recursive_mnt(mnt, path, NULL);
1623 err = attach_recursive_mnt(mnt, path, NULL);
1624out_unlock:
1625 mutex_unlock(&path->dentry->d_inode->i_mutex);
1626 return err;
1627} 1706}
1628 1707
1629/* 1708/*
@@ -1686,6 +1765,7 @@ static int do_change_type(struct path *path, int flag)
1686static int do_loopback(struct path *path, char *old_name, 1765static int do_loopback(struct path *path, char *old_name,
1687 int recurse) 1766 int recurse)
1688{ 1767{
1768 LIST_HEAD(umount_list);
1689 struct path old_path; 1769 struct path old_path;
1690 struct vfsmount *mnt = NULL; 1770 struct vfsmount *mnt = NULL;
1691 int err = mount_is_safe(path); 1771 int err = mount_is_safe(path);
@@ -1697,13 +1777,16 @@ static int do_loopback(struct path *path, char *old_name,
1697 if (err) 1777 if (err)
1698 return err; 1778 return err;
1699 1779
1700 down_write(&namespace_sem); 1780 err = lock_mount(path);
1781 if (err)
1782 goto out;
1783
1701 err = -EINVAL; 1784 err = -EINVAL;
1702 if (IS_MNT_UNBINDABLE(old_path.mnt)) 1785 if (IS_MNT_UNBINDABLE(old_path.mnt))
1703 goto out; 1786 goto out2;
1704 1787
1705 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1788 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1706 goto out; 1789 goto out2;
1707 1790
1708 err = -ENOMEM; 1791 err = -ENOMEM;
1709 if (recurse) 1792 if (recurse)
@@ -1712,20 +1795,18 @@ static int do_loopback(struct path *path, char *old_name,
1712 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); 1795 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
1713 1796
1714 if (!mnt) 1797 if (!mnt)
1715 goto out; 1798 goto out2;
1716 1799
1717 err = graft_tree(mnt, path); 1800 err = graft_tree(mnt, path);
1718 if (err) { 1801 if (err) {
1719 LIST_HEAD(umount_list);
1720
1721 br_write_lock(vfsmount_lock); 1802 br_write_lock(vfsmount_lock);
1722 umount_tree(mnt, 0, &umount_list); 1803 umount_tree(mnt, 0, &umount_list);
1723 br_write_unlock(vfsmount_lock); 1804 br_write_unlock(vfsmount_lock);
1724 release_mounts(&umount_list);
1725 } 1805 }
1726 1806out2:
1807 unlock_mount(path);
1808 release_mounts(&umount_list);
1727out: 1809out:
1728 up_write(&namespace_sem);
1729 path_put(&old_path); 1810 path_put(&old_path);
1730 return err; 1811 return err;
1731} 1812}
@@ -1767,6 +1848,10 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
1767 if (path->dentry != path->mnt->mnt_root) 1848 if (path->dentry != path->mnt->mnt_root)
1768 return -EINVAL; 1849 return -EINVAL;
1769 1850
1851 err = security_sb_remount(sb, data);
1852 if (err)
1853 return err;
1854
1770 down_write(&sb->s_umount); 1855 down_write(&sb->s_umount);
1771 if (flags & MS_BIND) 1856 if (flags & MS_BIND)
1772 err = change_mount_flags(path->mnt, flags); 1857 err = change_mount_flags(path->mnt, flags);
@@ -1810,18 +1895,12 @@ static int do_move_mount(struct path *path, char *old_name)
1810 if (err) 1895 if (err)
1811 return err; 1896 return err;
1812 1897
1813 down_write(&namespace_sem); 1898 err = lock_mount(path);
1814 err = follow_down(path, true);
1815 if (err < 0) 1899 if (err < 0)
1816 goto out; 1900 goto out;
1817 1901
1818 err = -EINVAL; 1902 err = -EINVAL;
1819 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) 1903 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1820 goto out;
1821
1822 err = -ENOENT;
1823 mutex_lock(&path->dentry->d_inode->i_mutex);
1824 if (cant_mount(path->dentry))
1825 goto out1; 1904 goto out1;
1826 1905
1827 if (d_unlinked(path->dentry)) 1906 if (d_unlinked(path->dentry))
@@ -1863,16 +1942,87 @@ static int do_move_mount(struct path *path, char *old_name)
1863 * automatically */ 1942 * automatically */
1864 list_del_init(&old_path.mnt->mnt_expire); 1943 list_del_init(&old_path.mnt->mnt_expire);
1865out1: 1944out1:
1866 mutex_unlock(&path->dentry->d_inode->i_mutex); 1945 unlock_mount(path);
1867out: 1946out:
1868 up_write(&namespace_sem);
1869 if (!err) 1947 if (!err)
1870 path_put(&parent_path); 1948 path_put(&parent_path);
1871 path_put(&old_path); 1949 path_put(&old_path);
1872 return err; 1950 return err;
1873} 1951}
1874 1952
1875static int do_add_mount(struct vfsmount *, struct path *, int); 1953static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1954{
1955 int err;
1956 const char *subtype = strchr(fstype, '.');
1957 if (subtype) {
1958 subtype++;
1959 err = -EINVAL;
1960 if (!subtype[0])
1961 goto err;
1962 } else
1963 subtype = "";
1964
1965 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
1966 err = -ENOMEM;
1967 if (!mnt->mnt_sb->s_subtype)
1968 goto err;
1969 return mnt;
1970
1971 err:
1972 mntput(mnt);
1973 return ERR_PTR(err);
1974}
1975
1976struct vfsmount *
1977do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1978{
1979 struct file_system_type *type = get_fs_type(fstype);
1980 struct vfsmount *mnt;
1981 if (!type)
1982 return ERR_PTR(-ENODEV);
1983 mnt = vfs_kern_mount(type, flags, name, data);
1984 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1985 !mnt->mnt_sb->s_subtype)
1986 mnt = fs_set_subtype(mnt, fstype);
1987 put_filesystem(type);
1988 return mnt;
1989}
1990EXPORT_SYMBOL_GPL(do_kern_mount);
1991
1992/*
1993 * add a mount into a namespace's mount tree
1994 */
1995static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
1996{
1997 int err;
1998
1999 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
2000
2001 err = lock_mount(path);
2002 if (err)
2003 return err;
2004
2005 err = -EINVAL;
2006 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
2007 goto unlock;
2008
2009 /* Refuse the same filesystem on the same mount point */
2010 err = -EBUSY;
2011 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
2012 path->mnt->mnt_root == path->dentry)
2013 goto unlock;
2014
2015 err = -EINVAL;
2016 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
2017 goto unlock;
2018
2019 newmnt->mnt_flags = mnt_flags;
2020 err = graft_tree(newmnt, path);
2021
2022unlock:
2023 unlock_mount(path);
2024 return err;
2025}
1876 2026
1877/* 2027/*
1878 * create a new mount for userspace and request it to be added into the 2028 * create a new mount for userspace and request it to be added into the
@@ -1932,43 +2082,6 @@ fail:
1932 return err; 2082 return err;
1933} 2083}
1934 2084
1935/*
1936 * add a mount into a namespace's mount tree
1937 */
1938static int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
1939{
1940 int err;
1941
1942 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
1943
1944 down_write(&namespace_sem);
1945 /* Something was mounted here while we slept */
1946 err = follow_down(path, true);
1947 if (err < 0)
1948 goto unlock;
1949
1950 err = -EINVAL;
1951 if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
1952 goto unlock;
1953
1954 /* Refuse the same filesystem on the same mount point */
1955 err = -EBUSY;
1956 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
1957 path->mnt->mnt_root == path->dentry)
1958 goto unlock;
1959
1960 err = -EINVAL;
1961 if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
1962 goto unlock;
1963
1964 newmnt->mnt_flags = mnt_flags;
1965 err = graft_tree(newmnt, path);
1966
1967unlock:
1968 up_write(&namespace_sem);
1969 return err;
1970}
1971
1972/** 2085/**
1973 * mnt_set_expiry - Put a mount on an expiration list 2086 * mnt_set_expiry - Put a mount on an expiration list
1974 * @mnt: The mount to list. 2087 * @mnt: The mount to list.
@@ -2469,65 +2582,60 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2469 error = user_path_dir(new_root, &new); 2582 error = user_path_dir(new_root, &new);
2470 if (error) 2583 if (error)
2471 goto out0; 2584 goto out0;
2472 error = -EINVAL;
2473 if (!check_mnt(new.mnt))
2474 goto out1;
2475 2585
2476 error = user_path_dir(put_old, &old); 2586 error = user_path_dir(put_old, &old);
2477 if (error) 2587 if (error)
2478 goto out1; 2588 goto out1;
2479 2589
2480 error = security_sb_pivotroot(&old, &new); 2590 error = security_sb_pivotroot(&old, &new);
2481 if (error) { 2591 if (error)
2482 path_put(&old); 2592 goto out2;
2483 goto out1;
2484 }
2485 2593
2486 get_fs_root(current->fs, &root); 2594 get_fs_root(current->fs, &root);
2487 down_write(&namespace_sem); 2595 error = lock_mount(&old);
2488 mutex_lock(&old.dentry->d_inode->i_mutex); 2596 if (error)
2597 goto out3;
2598
2489 error = -EINVAL; 2599 error = -EINVAL;
2490 if (IS_MNT_SHARED(old.mnt) || 2600 if (IS_MNT_SHARED(old.mnt) ||
2491 IS_MNT_SHARED(new.mnt->mnt_parent) || 2601 IS_MNT_SHARED(new.mnt->mnt_parent) ||
2492 IS_MNT_SHARED(root.mnt->mnt_parent)) 2602 IS_MNT_SHARED(root.mnt->mnt_parent))
2493 goto out2; 2603 goto out4;
2494 if (!check_mnt(root.mnt)) 2604 if (!check_mnt(root.mnt) || !check_mnt(new.mnt))
2495 goto out2; 2605 goto out4;
2496 error = -ENOENT; 2606 error = -ENOENT;
2497 if (cant_mount(old.dentry))
2498 goto out2;
2499 if (d_unlinked(new.dentry)) 2607 if (d_unlinked(new.dentry))
2500 goto out2; 2608 goto out4;
2501 if (d_unlinked(old.dentry)) 2609 if (d_unlinked(old.dentry))
2502 goto out2; 2610 goto out4;
2503 error = -EBUSY; 2611 error = -EBUSY;
2504 if (new.mnt == root.mnt || 2612 if (new.mnt == root.mnt ||
2505 old.mnt == root.mnt) 2613 old.mnt == root.mnt)
2506 goto out2; /* loop, on the same file system */ 2614 goto out4; /* loop, on the same file system */
2507 error = -EINVAL; 2615 error = -EINVAL;
2508 if (root.mnt->mnt_root != root.dentry) 2616 if (root.mnt->mnt_root != root.dentry)
2509 goto out2; /* not a mountpoint */ 2617 goto out4; /* not a mountpoint */
2510 if (root.mnt->mnt_parent == root.mnt) 2618 if (root.mnt->mnt_parent == root.mnt)
2511 goto out2; /* not attached */ 2619 goto out4; /* not attached */
2512 if (new.mnt->mnt_root != new.dentry) 2620 if (new.mnt->mnt_root != new.dentry)
2513 goto out2; /* not a mountpoint */ 2621 goto out4; /* not a mountpoint */
2514 if (new.mnt->mnt_parent == new.mnt) 2622 if (new.mnt->mnt_parent == new.mnt)
2515 goto out2; /* not attached */ 2623 goto out4; /* not attached */
2516 /* make sure we can reach put_old from new_root */ 2624 /* make sure we can reach put_old from new_root */
2517 tmp = old.mnt; 2625 tmp = old.mnt;
2518 br_write_lock(vfsmount_lock);
2519 if (tmp != new.mnt) { 2626 if (tmp != new.mnt) {
2520 for (;;) { 2627 for (;;) {
2521 if (tmp->mnt_parent == tmp) 2628 if (tmp->mnt_parent == tmp)
2522 goto out3; /* already mounted on put_old */ 2629 goto out4; /* already mounted on put_old */
2523 if (tmp->mnt_parent == new.mnt) 2630 if (tmp->mnt_parent == new.mnt)
2524 break; 2631 break;
2525 tmp = tmp->mnt_parent; 2632 tmp = tmp->mnt_parent;
2526 } 2633 }
2527 if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) 2634 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2528 goto out3; 2635 goto out4;
2529 } else if (!is_subdir(old.dentry, new.dentry)) 2636 } else if (!is_subdir(old.dentry, new.dentry))
2530 goto out3; 2637 goto out4;
2638 br_write_lock(vfsmount_lock);
2531 detach_mnt(new.mnt, &parent_path); 2639 detach_mnt(new.mnt, &parent_path);
2532 detach_mnt(root.mnt, &root_parent); 2640 detach_mnt(root.mnt, &root_parent);
2533 /* mount old root on put_old */ 2641 /* mount old root on put_old */
@@ -2537,22 +2645,21 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2537 touch_mnt_namespace(current->nsproxy->mnt_ns); 2645 touch_mnt_namespace(current->nsproxy->mnt_ns);
2538 br_write_unlock(vfsmount_lock); 2646 br_write_unlock(vfsmount_lock);
2539 chroot_fs_refs(&root, &new); 2647 chroot_fs_refs(&root, &new);
2540
2541 error = 0; 2648 error = 0;
2542 path_put(&root_parent); 2649out4:
2543 path_put(&parent_path); 2650 unlock_mount(&old);
2544out2: 2651 if (!error) {
2545 mutex_unlock(&old.dentry->d_inode->i_mutex); 2652 path_put(&root_parent);
2546 up_write(&namespace_sem); 2653 path_put(&parent_path);
2654 }
2655out3:
2547 path_put(&root); 2656 path_put(&root);
2657out2:
2548 path_put(&old); 2658 path_put(&old);
2549out1: 2659out1:
2550 path_put(&new); 2660 path_put(&new);
2551out0: 2661out0:
2552 return error; 2662 return error;
2553out3:
2554 br_write_unlock(vfsmount_lock);
2555 goto out2;
2556} 2663}
2557 2664
2558static void __init init_mount_tree(void) 2665static void __init init_mount_tree(void)
@@ -2594,7 +2701,7 @@ void __init mnt_init(void)
2594 if (!mount_hashtable) 2701 if (!mount_hashtable)
2595 panic("Failed to allocate mount hash table\n"); 2702 panic("Failed to allocate mount hash table\n");
2596 2703
2597 printk("Mount-cache hash table entries: %lu\n", HASH_SIZE); 2704 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
2598 2705
2599 for (u = 0; u < HASH_SIZE; u++) 2706 for (u = 0; u < HASH_SIZE; u++)
2600 INIT_LIST_HEAD(&mount_hashtable[u]); 2707 INIT_LIST_HEAD(&mount_hashtable[u]);
@@ -2627,3 +2734,9 @@ void put_mnt_ns(struct mnt_namespace *ns)
2627 kfree(ns); 2734 kfree(ns);
2628} 2735}
2629EXPORT_SYMBOL(put_mnt_ns); 2736EXPORT_SYMBOL(put_mnt_ns);
2737
2738struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
2739{
2740 return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
2741}
2742EXPORT_SYMBOL_GPL(kern_mount_data);
diff --git a/fs/ncpfs/Makefile b/fs/ncpfs/Makefile
index 68ea095100a8..c66af563f2ce 100644
--- a/fs/ncpfs/Makefile
+++ b/fs/ncpfs/Makefile
@@ -11,6 +11,6 @@ ncpfs-$(CONFIG_NCPFS_EXTRAS) += symlink.o
11ncpfs-$(CONFIG_NCPFS_NFS_NS) += symlink.o 11ncpfs-$(CONFIG_NCPFS_NFS_NS) += symlink.o
12 12
13# If you want debugging output, please uncomment the following line 13# If you want debugging output, please uncomment the following line
14# EXTRA_CFLAGS += -DDEBUG_NCP=1 14# ccflags-y := -DDEBUG_NCP=1
15 15
16CFLAGS_ncplib_kernel.o := -finline-functions 16CFLAGS_ncplib_kernel.o := -finline-functions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 89587573fe50..2f41dccea18e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
188 rv = NFS4ERR_DELAY; 188 rv = NFS4ERR_DELAY;
189 list_del_init(&lo->plh_bulk_recall); 189 list_del_init(&lo->plh_bulk_recall);
190 spin_unlock(&ino->i_lock); 190 spin_unlock(&ino->i_lock);
191 pnfs_free_lseg_list(&free_me_list);
191 put_layout_hdr(lo); 192 put_layout_hdr(lo);
192 iput(ino); 193 iput(ino);
193 } 194 }
194 pnfs_free_lseg_list(&free_me_list);
195 return rv; 195 return rv;
196} 196}
197 197
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index bd3ca32879e7..139be9647d80 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -82,6 +82,11 @@ retry:
82#endif /* CONFIG_NFS_V4 */ 82#endif /* CONFIG_NFS_V4 */
83 83
84/* 84/*
85 * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
86 */
87static int nfs4_disable_idmapping = 0;
88
89/*
85 * RPC cruft for NFS 90 * RPC cruft for NFS
86 */ 91 */
87static struct rpc_version *nfs_version[5] = { 92static struct rpc_version *nfs_version[5] = {
@@ -481,7 +486,12 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
481 * Look up a client by IP address and protocol version 486 * Look up a client by IP address and protocol version
482 * - creates a new record if one doesn't yet exist 487 * - creates a new record if one doesn't yet exist
483 */ 488 */
484static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) 489static struct nfs_client *
490nfs_get_client(const struct nfs_client_initdata *cl_init,
491 const struct rpc_timeout *timeparms,
492 const char *ip_addr,
493 rpc_authflavor_t authflavour,
494 int noresvport)
485{ 495{
486 struct nfs_client *clp, *new = NULL; 496 struct nfs_client *clp, *new = NULL;
487 int error; 497 int error;
@@ -512,6 +522,13 @@ install_client:
512 clp = new; 522 clp = new;
513 list_add(&clp->cl_share_link, &nfs_client_list); 523 list_add(&clp->cl_share_link, &nfs_client_list);
514 spin_unlock(&nfs_client_lock); 524 spin_unlock(&nfs_client_lock);
525
526 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
527 authflavour, noresvport);
528 if (error < 0) {
529 nfs_put_client(clp);
530 return ERR_PTR(error);
531 }
515 dprintk("--> nfs_get_client() = %p [new]\n", clp); 532 dprintk("--> nfs_get_client() = %p [new]\n", clp);
516 return clp; 533 return clp;
517 534
@@ -767,9 +784,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
767/* 784/*
768 * Initialise an NFS2 or NFS3 client 785 * Initialise an NFS2 or NFS3 client
769 */ 786 */
770static int nfs_init_client(struct nfs_client *clp, 787int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
771 const struct rpc_timeout *timeparms, 788 const char *ip_addr, rpc_authflavor_t authflavour,
772 const struct nfs_parsed_mount_data *data) 789 int noresvport)
773{ 790{
774 int error; 791 int error;
775 792
@@ -784,7 +801,7 @@ static int nfs_init_client(struct nfs_client *clp,
784 * - RFC 2623, sec 2.3.2 801 * - RFC 2623, sec 2.3.2
785 */ 802 */
786 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 803 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
787 0, data->flags & NFS_MOUNT_NORESVPORT); 804 0, noresvport);
788 if (error < 0) 805 if (error < 0)
789 goto error; 806 goto error;
790 nfs_mark_client_ready(clp, NFS_CS_READY); 807 nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -820,19 +837,17 @@ static int nfs_init_server(struct nfs_server *server,
820 cl_init.rpc_ops = &nfs_v3_clientops; 837 cl_init.rpc_ops = &nfs_v3_clientops;
821#endif 838#endif
822 839
840 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
841 data->timeo, data->retrans);
842
823 /* Allocate or find a client reference we can use */ 843 /* Allocate or find a client reference we can use */
824 clp = nfs_get_client(&cl_init); 844 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
845 data->flags & NFS_MOUNT_NORESVPORT);
825 if (IS_ERR(clp)) { 846 if (IS_ERR(clp)) {
826 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); 847 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
827 return PTR_ERR(clp); 848 return PTR_ERR(clp);
828 } 849 }
829 850
830 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
831 data->timeo, data->retrans);
832 error = nfs_init_client(clp, &timeparms, data);
833 if (error < 0)
834 goto error;
835
836 server->nfs_client = clp; 851 server->nfs_client = clp;
837 852
838 /* Initialise the client representation from the mount data */ 853 /* Initialise the client representation from the mount data */
@@ -1009,14 +1024,19 @@ static void nfs_server_insert_lists(struct nfs_server *server)
1009 spin_lock(&nfs_client_lock); 1024 spin_lock(&nfs_client_lock);
1010 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1025 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
1011 list_add_tail(&server->master_link, &nfs_volume_list); 1026 list_add_tail(&server->master_link, &nfs_volume_list);
1027 clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1012 spin_unlock(&nfs_client_lock); 1028 spin_unlock(&nfs_client_lock);
1013 1029
1014} 1030}
1015 1031
1016static void nfs_server_remove_lists(struct nfs_server *server) 1032static void nfs_server_remove_lists(struct nfs_server *server)
1017{ 1033{
1034 struct nfs_client *clp = server->nfs_client;
1035
1018 spin_lock(&nfs_client_lock); 1036 spin_lock(&nfs_client_lock);
1019 list_del_rcu(&server->client_link); 1037 list_del_rcu(&server->client_link);
1038 if (clp && list_empty(&clp->cl_superblocks))
1039 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1020 list_del(&server->master_link); 1040 list_del(&server->master_link);
1021 spin_unlock(&nfs_client_lock); 1041 spin_unlock(&nfs_client_lock);
1022 1042
@@ -1307,11 +1327,11 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1307/* 1327/*
1308 * Initialise an NFS4 client record 1328 * Initialise an NFS4 client record
1309 */ 1329 */
1310static int nfs4_init_client(struct nfs_client *clp, 1330int nfs4_init_client(struct nfs_client *clp,
1311 const struct rpc_timeout *timeparms, 1331 const struct rpc_timeout *timeparms,
1312 const char *ip_addr, 1332 const char *ip_addr,
1313 rpc_authflavor_t authflavour, 1333 rpc_authflavor_t authflavour,
1314 int flags) 1334 int noresvport)
1315{ 1335{
1316 int error; 1336 int error;
1317 1337
@@ -1325,7 +1345,7 @@ static int nfs4_init_client(struct nfs_client *clp,
1325 clp->rpc_ops = &nfs_v4_clientops; 1345 clp->rpc_ops = &nfs_v4_clientops;
1326 1346
1327 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1347 error = nfs_create_rpc_client(clp, timeparms, authflavour,
1328 1, flags & NFS_MOUNT_NORESVPORT); 1348 1, noresvport);
1329 if (error < 0) 1349 if (error < 0)
1330 goto error; 1350 goto error;
1331 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); 1351 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1378,27 +1398,71 @@ static int nfs4_set_client(struct nfs_server *server,
1378 dprintk("--> nfs4_set_client()\n"); 1398 dprintk("--> nfs4_set_client()\n");
1379 1399
1380 /* Allocate or find a client reference we can use */ 1400 /* Allocate or find a client reference we can use */
1381 clp = nfs_get_client(&cl_init); 1401 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
1402 server->flags & NFS_MOUNT_NORESVPORT);
1382 if (IS_ERR(clp)) { 1403 if (IS_ERR(clp)) {
1383 error = PTR_ERR(clp); 1404 error = PTR_ERR(clp);
1384 goto error; 1405 goto error;
1385 } 1406 }
1386 error = nfs4_init_client(clp, timeparms, ip_addr, authflavour, 1407
1387 server->flags); 1408 /*
1388 if (error < 0) 1409 * Query for the lease time on clientid setup or renewal
1389 goto error_put; 1410 *
1411 * Note that this will be set on nfs_clients that were created
1412 * only for the DS role and did not set this bit, but now will
1413 * serve a dual role.
1414 */
1415 set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
1390 1416
1391 server->nfs_client = clp; 1417 server->nfs_client = clp;
1392 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); 1418 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
1393 return 0; 1419 return 0;
1394
1395error_put:
1396 nfs_put_client(clp);
1397error: 1420error:
1398 dprintk("<-- nfs4_set_client() = xerror %d\n", error); 1421 dprintk("<-- nfs4_set_client() = xerror %d\n", error);
1399 return error; 1422 return error;
1400} 1423}
1401 1424
1425/*
1426 * Set up a pNFS Data Server client.
1427 *
1428 * Return any existing nfs_client that matches server address,port,version
1429 * and minorversion.
1430 *
1431 * For a new nfs_client, use a soft mount (default), a low retrans and a
1432 * low timeout interval so that if a connection is lost, we retry through
1433 * the MDS.
1434 */
1435struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1436 const struct sockaddr *ds_addr,
1437 int ds_addrlen, int ds_proto)
1438{
1439 struct nfs_client_initdata cl_init = {
1440 .addr = ds_addr,
1441 .addrlen = ds_addrlen,
1442 .rpc_ops = &nfs_v4_clientops,
1443 .proto = ds_proto,
1444 .minorversion = mds_clp->cl_minorversion,
1445 };
1446 struct rpc_timeout ds_timeout = {
1447 .to_initval = 15 * HZ,
1448 .to_maxval = 15 * HZ,
1449 .to_retries = 1,
1450 .to_exponential = 1,
1451 };
1452 struct nfs_client *clp;
1453
1454 /*
1455 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
1456 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
1457 * (section 13.1 RFC 5661).
1458 */
1459 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
1460 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
1461
1462 dprintk("<-- %s %p\n", __func__, clp);
1463 return clp;
1464}
1465EXPORT_SYMBOL(nfs4_set_ds_client);
1402 1466
1403/* 1467/*
1404 * Session has been established, and the client marked ready. 1468 * Session has been established, and the client marked ready.
@@ -1435,6 +1499,10 @@ static int nfs4_server_common_setup(struct nfs_server *server,
1435 BUG_ON(!server->nfs_client->rpc_ops); 1499 BUG_ON(!server->nfs_client->rpc_ops);
1436 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1500 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1437 1501
1502 /* data servers support only a subset of NFSv4.1 */
1503 if (is_ds_only_client(server->nfs_client))
1504 return -EPROTONOSUPPORT;
1505
1438 fattr = nfs_alloc_fattr(); 1506 fattr = nfs_alloc_fattr();
1439 if (fattr == NULL) 1507 if (fattr == NULL)
1440 return -ENOMEM; 1508 return -ENOMEM;
@@ -1504,6 +1572,13 @@ static int nfs4_init_server(struct nfs_server *server,
1504 if (error < 0) 1572 if (error < 0)
1505 goto error; 1573 goto error;
1506 1574
1575 /*
1576 * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
1577 * authentication.
1578 */
1579 if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
1580 server->caps |= NFS_CAP_UIDGID_NOMAP;
1581
1507 if (data->rsize) 1582 if (data->rsize)
1508 server->rsize = nfs_block_size(data->rsize, NULL); 1583 server->rsize = nfs_block_size(data->rsize, NULL);
1509 if (data->wsize) 1584 if (data->wsize)
@@ -1921,3 +1996,7 @@ void nfs_fs_proc_exit(void)
1921} 1996}
1922 1997
1923#endif /* CONFIG_PROC_FS */ 1998#endif /* CONFIG_PROC_FS */
1999
2000module_param(nfs4_disable_idmapping, bool, 0644);
2001MODULE_PARM_DESC(nfs4_disable_idmapping,
2002 "Turn off NFSv4 idmapping when using 'sec=sys'");
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c3eb33b904d..abdf38d5971d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1169,11 +1169,23 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1169 iput(inode); 1169 iput(inode);
1170} 1170}
1171 1171
1172static void nfs_d_release(struct dentry *dentry)
1173{
1174 /* free cached devname value, if it survived that far */
1175 if (unlikely(dentry->d_fsdata)) {
1176 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1177 WARN_ON(1);
1178 else
1179 kfree(dentry->d_fsdata);
1180 }
1181}
1182
1172const struct dentry_operations nfs_dentry_operations = { 1183const struct dentry_operations nfs_dentry_operations = {
1173 .d_revalidate = nfs_lookup_revalidate, 1184 .d_revalidate = nfs_lookup_revalidate,
1174 .d_delete = nfs_dentry_delete, 1185 .d_delete = nfs_dentry_delete,
1175 .d_iput = nfs_dentry_iput, 1186 .d_iput = nfs_dentry_iput,
1176 .d_automount = nfs_d_automount, 1187 .d_automount = nfs_d_automount,
1188 .d_release = nfs_d_release,
1177}; 1189};
1178 1190
1179static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1191static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
@@ -1248,6 +1260,7 @@ const struct dentry_operations nfs4_dentry_operations = {
1248 .d_delete = nfs_dentry_delete, 1260 .d_delete = nfs_dentry_delete,
1249 .d_iput = nfs_dentry_iput, 1261 .d_iput = nfs_dentry_iput,
1250 .d_automount = nfs_d_automount, 1262 .d_automount = nfs_d_automount,
1263 .d_release = nfs_d_release,
1251}; 1264};
1252 1265
1253/* 1266/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9943a75bb6d1..8eea25366717 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -45,6 +45,7 @@
45#include <linux/pagemap.h> 45#include <linux/pagemap.h>
46#include <linux/kref.h> 46#include <linux/kref.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/task_io_accounting_ops.h>
48 49
49#include <linux/nfs_fs.h> 50#include <linux/nfs_fs.h>
50#include <linux/nfs_page.h> 51#include <linux/nfs_page.h>
@@ -649,8 +650,7 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
649{ 650{
650 struct nfs_write_data *data = calldata; 651 struct nfs_write_data *data = calldata;
651 652
652 if (nfs_writeback_done(task, data) != 0) 653 nfs_writeback_done(task, data);
653 return;
654} 654}
655 655
656/* 656/*
@@ -938,6 +938,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
938 if (retval) 938 if (retval)
939 goto out; 939 goto out;
940 940
941 task_io_account_read(count);
942
941 retval = nfs_direct_read(iocb, iov, nr_segs, pos); 943 retval = nfs_direct_read(iocb, iov, nr_segs, pos);
942 if (retval > 0) 944 if (retval > 0)
943 iocb->ki_pos = pos + retval; 945 iocb->ki_pos = pos + retval;
@@ -999,6 +1001,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
999 if (retval) 1001 if (retval)
1000 goto out; 1002 goto out;
1001 1003
1004 task_io_account_write(count);
1005
1002 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 1006 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
1003 1007
1004 if (retval > 0) 1008 if (retval > 0)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7bf029ef4084..d85a534b15cd 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -387,10 +387,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
387 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
388 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
389 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
394start: 390start:
395 /* 391 /*
396 * Prevent starvation issues if someone is doing a consistency 392 * Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b5ffe8fa291f..1084792bc0fe 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -75,18 +75,25 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
75/* 75/*
76 * get an NFS2/NFS3 root dentry from the root filehandle 76 * get an NFS2/NFS3 root dentry from the root filehandle
77 */ 77 */
78struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) 78struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
79 const char *devname)
79{ 80{
80 struct nfs_server *server = NFS_SB(sb); 81 struct nfs_server *server = NFS_SB(sb);
81 struct nfs_fsinfo fsinfo; 82 struct nfs_fsinfo fsinfo;
82 struct dentry *ret; 83 struct dentry *ret;
83 struct inode *inode; 84 struct inode *inode;
85 void *name = kstrdup(devname, GFP_KERNEL);
84 int error; 86 int error;
85 87
88 if (!name)
89 return ERR_PTR(-ENOMEM);
90
86 /* get the actual root for this mount */ 91 /* get the actual root for this mount */
87 fsinfo.fattr = nfs_alloc_fattr(); 92 fsinfo.fattr = nfs_alloc_fattr();
88 if (fsinfo.fattr == NULL) 93 if (fsinfo.fattr == NULL) {
94 kfree(name);
89 return ERR_PTR(-ENOMEM); 95 return ERR_PTR(-ENOMEM);
96 }
90 97
91 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 98 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
92 if (error < 0) { 99 if (error < 0) {
@@ -119,7 +126,15 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
119 } 126 }
120 127
121 security_d_instantiate(ret, inode); 128 security_d_instantiate(ret, inode);
129 spin_lock(&ret->d_lock);
130 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
131 ret->d_fsdata = name;
132 name = NULL;
133 }
134 spin_unlock(&ret->d_lock);
122out: 135out:
136 if (name)
137 kfree(name);
123 nfs_free_fattr(fsinfo.fattr); 138 nfs_free_fattr(fsinfo.fattr);
124 return ret; 139 return ret;
125} 140}
@@ -169,27 +184,35 @@ out:
169/* 184/*
170 * get an NFS4 root dentry from the root filehandle 185 * get an NFS4 root dentry from the root filehandle
171 */ 186 */
172struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) 187struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
188 const char *devname)
173{ 189{
174 struct nfs_server *server = NFS_SB(sb); 190 struct nfs_server *server = NFS_SB(sb);
175 struct nfs_fattr *fattr = NULL; 191 struct nfs_fattr *fattr = NULL;
176 struct dentry *ret; 192 struct dentry *ret;
177 struct inode *inode; 193 struct inode *inode;
194 void *name = kstrdup(devname, GFP_KERNEL);
178 int error; 195 int error;
179 196
180 dprintk("--> nfs4_get_root()\n"); 197 dprintk("--> nfs4_get_root()\n");
181 198
199 if (!name)
200 return ERR_PTR(-ENOMEM);
201
182 /* get the info about the server and filesystem */ 202 /* get the info about the server and filesystem */
183 error = nfs4_server_capabilities(server, mntfh); 203 error = nfs4_server_capabilities(server, mntfh);
184 if (error < 0) { 204 if (error < 0) {
185 dprintk("nfs_get_root: getcaps error = %d\n", 205 dprintk("nfs_get_root: getcaps error = %d\n",
186 -error); 206 -error);
207 kfree(name);
187 return ERR_PTR(error); 208 return ERR_PTR(error);
188 } 209 }
189 210
190 fattr = nfs_alloc_fattr(); 211 fattr = nfs_alloc_fattr();
191 if (fattr == NULL) 212 if (fattr == NULL) {
192 return ERR_PTR(-ENOMEM);; 213 kfree(name);
214 return ERR_PTR(-ENOMEM);
215 }
193 216
194 /* get the actual root for this mount */ 217 /* get the actual root for this mount */
195 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); 218 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
@@ -223,8 +246,15 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
223 } 246 }
224 247
225 security_d_instantiate(ret, inode); 248 security_d_instantiate(ret, inode);
226 249 spin_lock(&ret->d_lock);
250 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
251 ret->d_fsdata = name;
252 name = NULL;
253 }
254 spin_unlock(&ret->d_lock);
227out: 255out:
256 if (name)
257 kfree(name);
228 nfs_free_fattr(fattr); 258 nfs_free_fattr(fattr);
229 dprintk("<-- nfs4_get_root()\n"); 259 dprintk("<-- nfs4_get_root()\n");
230 return ret; 260 return ret;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 18696882f1c6..79664a1025af 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -33,16 +33,41 @@
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36#include <linux/types.h>
37#include <linux/string.h>
38#include <linux/kernel.h>
39
40static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
41{
42 unsigned long val;
43 char buf[16];
44
45 if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
46 return 0;
47 memcpy(buf, name, namelen);
48 buf[namelen] = '\0';
49 if (strict_strtoul(buf, 0, &val) != 0)
50 return 0;
51 *res = val;
52 return 1;
53}
54
55static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
56{
57 return snprintf(buf, buflen, "%u", id);
58}
36 59
37#ifdef CONFIG_NFS_USE_NEW_IDMAPPER 60#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
38 61
39#include <linux/slab.h> 62#include <linux/slab.h>
40#include <linux/cred.h> 63#include <linux/cred.h>
64#include <linux/sunrpc/sched.h>
65#include <linux/nfs4.h>
66#include <linux/nfs_fs_sb.h>
41#include <linux/nfs_idmap.h> 67#include <linux/nfs_idmap.h>
42#include <linux/keyctl.h> 68#include <linux/keyctl.h>
43#include <linux/key-type.h> 69#include <linux/key-type.h>
44#include <linux/rcupdate.h> 70#include <linux/rcupdate.h>
45#include <linux/kernel.h>
46#include <linux/err.h> 71#include <linux/err.h>
47 72
48#include <keys/user-type.h> 73#include <keys/user-type.h>
@@ -219,23 +244,39 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
219 return ret; 244 return ret;
220} 245}
221 246
222int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 247int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
223{ 248{
249 if (nfs_map_string_to_numeric(name, namelen, uid))
250 return 0;
224 return nfs_idmap_lookup_id(name, namelen, "uid", uid); 251 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
225} 252}
226 253
227int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) 254int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
228{ 255{
256 if (nfs_map_string_to_numeric(name, namelen, gid))
257 return 0;
229 return nfs_idmap_lookup_id(name, namelen, "gid", gid); 258 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
230} 259}
231 260
232int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 261int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
233{ 262{
234 return nfs_idmap_lookup_name(uid, "user", buf, buflen); 263 int ret = -EINVAL;
264
265 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
266 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
267 if (ret < 0)
268 ret = nfs_map_numeric_to_string(uid, buf, buflen);
269 return ret;
235} 270}
236int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) 271int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
237{ 272{
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen); 273 int ret = -EINVAL;
274
275 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
276 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
277 if (ret < 0)
278 ret = nfs_map_numeric_to_string(gid, buf, buflen);
279 return ret;
239} 280}
240 281
241#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ 282#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
@@ -243,7 +284,6 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
243#include <linux/module.h> 284#include <linux/module.h>
244#include <linux/mutex.h> 285#include <linux/mutex.h>
245#include <linux/init.h> 286#include <linux/init.h>
246#include <linux/types.h>
247#include <linux/slab.h> 287#include <linux/slab.h>
248#include <linux/socket.h> 288#include <linux/socket.h>
249#include <linux/in.h> 289#include <linux/in.h>
@@ -695,31 +735,45 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
695 return hash; 735 return hash;
696} 736}
697 737
698int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 738int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
699{ 739{
700 struct idmap *idmap = clp->cl_idmap; 740 struct idmap *idmap = server->nfs_client->cl_idmap;
701 741
742 if (nfs_map_string_to_numeric(name, namelen, uid))
743 return 0;
702 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); 744 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
703} 745}
704 746
705int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 747int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
706{ 748{
707 struct idmap *idmap = clp->cl_idmap; 749 struct idmap *idmap = server->nfs_client->cl_idmap;
708 750
751 if (nfs_map_string_to_numeric(name, namelen, uid))
752 return 0;
709 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 753 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
710} 754}
711 755
712int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 756int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
713{ 757{
714 struct idmap *idmap = clp->cl_idmap; 758 struct idmap *idmap = server->nfs_client->cl_idmap;
759 int ret = -EINVAL;
715 760
716 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 761 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
762 ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
763 if (ret < 0)
764 ret = nfs_map_numeric_to_string(uid, buf, buflen);
765 return ret;
717} 766}
718int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 767int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
719{ 768{
720 struct idmap *idmap = clp->cl_idmap; 769 struct idmap *idmap = server->nfs_client->cl_idmap;
770 int ret = -EINVAL;
721 771
722 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 772 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
773 ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
774 if (ret < 0)
775 ret = nfs_map_numeric_to_string(uid, buf, buflen);
776 return ret;
723} 777}
724 778
725#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ 779#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..01768e5e2c9b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/inet.h> 37#include <linux/inet.h>
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
89 */ 90 */
90u64 nfs_compat_user_ino64(u64 fileid) 91u64 nfs_compat_user_ino64(u64 fileid)
91{ 92{
92 int ino; 93#ifdef CONFIG_COMPAT
94 compat_ulong_t ino;
95#else
96 unsigned long ino;
97#endif
93 98
94 if (enable_ino64) 99 if (enable_ino64)
95 return fileid; 100 return fileid;
@@ -1513,7 +1518,7 @@ static int nfsiod_start(void)
1513{ 1518{
1514 struct workqueue_struct *wq; 1519 struct workqueue_struct *wq;
1515 dprintk("RPC: creating workqueue nfsiod\n"); 1520 dprintk("RPC: creating workqueue nfsiod\n");
1516 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); 1521 wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
1517 if (wq == NULL) 1522 if (wq == NULL)
1518 return -ENOMEM; 1523 return -ENOMEM;
1519 nfsiod_workqueue = wq; 1524 nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index cf9fdbdabc67..72e0bddf7a2f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -148,6 +148,9 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
148 struct nfs_fattr *); 148 struct nfs_fattr *);
149extern void nfs_mark_client_ready(struct nfs_client *clp, int state); 149extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
150extern int nfs4_check_client_ready(struct nfs_client *clp); 150extern int nfs4_check_client_ready(struct nfs_client *clp);
151extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
152 const struct sockaddr *ds_addr,
153 int ds_addrlen, int ds_proto);
151#ifdef CONFIG_PROC_FS 154#ifdef CONFIG_PROC_FS
152extern int __init nfs_fs_proc_init(void); 155extern int __init nfs_fs_proc_init(void);
153extern void nfs_fs_proc_exit(void); 156extern void nfs_fs_proc_exit(void);
@@ -163,10 +166,10 @@ static inline void nfs_fs_proc_exit(void)
163 166
164/* nfs4namespace.c */ 167/* nfs4namespace.c */
165#ifdef CONFIG_NFS_V4 168#ifdef CONFIG_NFS_V4
166extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); 169extern struct vfsmount *nfs_do_refmount(struct dentry *dentry);
167#else 170#else
168static inline 171static inline
169struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 172struct vfsmount *nfs_do_refmount(struct dentry *dentry)
170{ 173{
171 return ERR_PTR(-ENOENT); 174 return ERR_PTR(-ENOENT);
172} 175}
@@ -213,8 +216,14 @@ extern const u32 nfs41_maxwrite_overhead;
213extern struct rpc_procinfo nfs4_procedures[]; 216extern struct rpc_procinfo nfs4_procedures[];
214#endif 217#endif
215 218
219extern int nfs4_init_ds_session(struct nfs_client *clp);
220
216/* proc.c */ 221/* proc.c */
217void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 222void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
223extern int nfs_init_client(struct nfs_client *clp,
224 const struct rpc_timeout *timeparms,
225 const char *ip_addr, rpc_authflavor_t authflavour,
226 int noresvport);
218 227
219/* dir.c */ 228/* dir.c */
220extern int nfs_access_cache_shrinker(struct shrinker *shrink, 229extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -247,24 +256,30 @@ extern void nfs_sb_active(struct super_block *sb);
247extern void nfs_sb_deactive(struct super_block *sb); 256extern void nfs_sb_deactive(struct super_block *sb);
248 257
249/* namespace.c */ 258/* namespace.c */
250extern char *nfs_path(const char *base, 259extern char *nfs_path(char **p, struct dentry *dentry,
251 const struct dentry *droot,
252 const struct dentry *dentry,
253 char *buffer, ssize_t buflen); 260 char *buffer, ssize_t buflen);
254extern struct vfsmount *nfs_d_automount(struct path *path); 261extern struct vfsmount *nfs_d_automount(struct path *path);
255 262
256/* getroot.c */ 263/* getroot.c */
257extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); 264extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
265 const char *);
258#ifdef CONFIG_NFS_V4 266#ifdef CONFIG_NFS_V4
259extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); 267extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
268 const char *);
260 269
261extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 270extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
262#endif 271#endif
263 272
264/* read.c */ 273/* read.c */
274extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
275 const struct rpc_call_ops *call_ops);
265extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 276extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
266 277
267/* write.c */ 278/* write.c */
279extern int nfs_initiate_write(struct nfs_write_data *data,
280 struct rpc_clnt *clnt,
281 const struct rpc_call_ops *call_ops,
282 int how);
268extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 283extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
269#ifdef CONFIG_MIGRATION 284#ifdef CONFIG_MIGRATION
270extern int nfs_migrate_page(struct address_space *, 285extern int nfs_migrate_page(struct address_space *,
@@ -274,6 +289,13 @@ extern int nfs_migrate_page(struct address_space *,
274#endif 289#endif
275 290
276/* nfs4proc.c */ 291/* nfs4proc.c */
292extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
293extern int nfs4_init_client(struct nfs_client *clp,
294 const struct rpc_timeout *timeparms,
295 const char *ip_addr,
296 rpc_authflavor_t authflavour,
297 int noresvport);
298extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
277extern int _nfs4_call_sync(struct nfs_server *server, 299extern int _nfs4_call_sync(struct nfs_server *server,
278 struct rpc_message *msg, 300 struct rpc_message *msg,
279 struct nfs4_sequence_args *args, 301 struct nfs4_sequence_args *args,
@@ -288,12 +310,11 @@ extern int _nfs4_call_sync_session(struct nfs_server *server,
288/* 310/*
289 * Determine the device name as a string 311 * Determine the device name as a string
290 */ 312 */
291static inline char *nfs_devname(const struct vfsmount *mnt_parent, 313static inline char *nfs_devname(struct dentry *dentry,
292 const struct dentry *dentry,
293 char *buffer, ssize_t buflen) 314 char *buffer, ssize_t buflen)
294{ 315{
295 return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root, 316 char *dummy;
296 dentry, buffer, buflen); 317 return nfs_path(&dummy, dentry, buffer, buflen);
297} 318}
298 319
299/* 320/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f32b8603dca8..bf1c68009ffd 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -25,33 +25,30 @@ static LIST_HEAD(nfs_automount_list);
25static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 25static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
26int nfs_mountpoint_expiry_timeout = 500 * HZ; 26int nfs_mountpoint_expiry_timeout = 500 * HZ;
27 27
28static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 28static struct vfsmount *nfs_do_submount(struct dentry *dentry,
29 const struct dentry *dentry,
30 struct nfs_fh *fh, 29 struct nfs_fh *fh,
31 struct nfs_fattr *fattr); 30 struct nfs_fattr *fattr);
32 31
33/* 32/*
34 * nfs_path - reconstruct the path given an arbitrary dentry 33 * nfs_path - reconstruct the path given an arbitrary dentry
35 * @base - arbitrary string to prepend to the path 34 * @base - used to return pointer to the end of devname part of path
36 * @droot - pointer to root dentry for mountpoint
37 * @dentry - pointer to dentry 35 * @dentry - pointer to dentry
38 * @buffer - result buffer 36 * @buffer - result buffer
39 * @buflen - length of buffer 37 * @buflen - length of buffer
40 * 38 *
41 * Helper function for constructing the path from the 39 * Helper function for constructing the server pathname
42 * root dentry to an arbitrary hashed dentry. 40 * by arbitrary hashed dentry.
43 * 41 *
44 * This is mainly for use in figuring out the path on the 42 * This is mainly for use in figuring out the path on the
45 * server side when automounting on top of an existing partition. 43 * server side when automounting on top of an existing partition
44 * and in generating /proc/mounts and friends.
46 */ 45 */
47char *nfs_path(const char *base, 46char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
48 const struct dentry *droot,
49 const struct dentry *dentry,
50 char *buffer, ssize_t buflen)
51{ 47{
52 char *end; 48 char *end;
53 int namelen; 49 int namelen;
54 unsigned seq; 50 unsigned seq;
51 const char *base;
55 52
56rename_retry: 53rename_retry:
57 end = buffer+buflen; 54 end = buffer+buflen;
@@ -60,7 +57,10 @@ rename_retry:
60 57
61 seq = read_seqbegin(&rename_lock); 58 seq = read_seqbegin(&rename_lock);
62 rcu_read_lock(); 59 rcu_read_lock();
63 while (!IS_ROOT(dentry) && dentry != droot) { 60 while (1) {
61 spin_lock(&dentry->d_lock);
62 if (IS_ROOT(dentry))
63 break;
64 namelen = dentry->d_name.len; 64 namelen = dentry->d_name.len;
65 buflen -= namelen + 1; 65 buflen -= namelen + 1;
66 if (buflen < 0) 66 if (buflen < 0)
@@ -68,27 +68,47 @@ rename_retry:
68 end -= namelen; 68 end -= namelen;
69 memcpy(end, dentry->d_name.name, namelen); 69 memcpy(end, dentry->d_name.name, namelen);
70 *--end = '/'; 70 *--end = '/';
71 spin_unlock(&dentry->d_lock);
71 dentry = dentry->d_parent; 72 dentry = dentry->d_parent;
72 } 73 }
73 rcu_read_unlock(); 74 if (read_seqretry(&rename_lock, seq)) {
74 if (read_seqretry(&rename_lock, seq)) 75 spin_unlock(&dentry->d_lock);
76 rcu_read_unlock();
75 goto rename_retry; 77 goto rename_retry;
78 }
76 if (*end != '/') { 79 if (*end != '/') {
77 if (--buflen < 0) 80 if (--buflen < 0) {
81 spin_unlock(&dentry->d_lock);
82 rcu_read_unlock();
78 goto Elong; 83 goto Elong;
84 }
79 *--end = '/'; 85 *--end = '/';
80 } 86 }
87 *p = end;
88 base = dentry->d_fsdata;
89 if (!base) {
90 spin_unlock(&dentry->d_lock);
91 rcu_read_unlock();
92 WARN_ON(1);
93 return end;
94 }
81 namelen = strlen(base); 95 namelen = strlen(base);
82 /* Strip off excess slashes in base string */ 96 /* Strip off excess slashes in base string */
83 while (namelen > 0 && base[namelen - 1] == '/') 97 while (namelen > 0 && base[namelen - 1] == '/')
84 namelen--; 98 namelen--;
85 buflen -= namelen; 99 buflen -= namelen;
86 if (buflen < 0) 100 if (buflen < 0) {
101 spin_unlock(&dentry->d_lock);
102 rcu_read_unlock();
87 goto Elong; 103 goto Elong;
104 }
88 end -= namelen; 105 end -= namelen;
89 memcpy(end, base, namelen); 106 memcpy(end, base, namelen);
107 spin_unlock(&dentry->d_lock);
108 rcu_read_unlock();
90 return end; 109 return end;
91Elong_unlock: 110Elong_unlock:
111 spin_unlock(&dentry->d_lock);
92 rcu_read_unlock(); 112 rcu_read_unlock();
93 if (read_seqretry(&rename_lock, seq)) 113 if (read_seqretry(&rename_lock, seq))
94 goto rename_retry; 114 goto rename_retry;
@@ -143,9 +163,9 @@ struct vfsmount *nfs_d_automount(struct path *path)
143 } 163 }
144 164
145 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 165 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
146 mnt = nfs_do_refmount(path->mnt, path->dentry); 166 mnt = nfs_do_refmount(path->dentry);
147 else 167 else
148 mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr); 168 mnt = nfs_do_submount(path->dentry, fh, fattr);
149 if (IS_ERR(mnt)) 169 if (IS_ERR(mnt))
150 goto out; 170 goto out;
151 171
@@ -209,19 +229,17 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
209 229
210/** 230/**
211 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary 231 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
212 * @mnt_parent - mountpoint of parent directory
213 * @dentry - parent directory 232 * @dentry - parent directory
214 * @fh - filehandle for new root dentry 233 * @fh - filehandle for new root dentry
215 * @fattr - attributes for new root inode 234 * @fattr - attributes for new root inode
216 * 235 *
217 */ 236 */
218static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 237static struct vfsmount *nfs_do_submount(struct dentry *dentry,
219 const struct dentry *dentry,
220 struct nfs_fh *fh, 238 struct nfs_fh *fh,
221 struct nfs_fattr *fattr) 239 struct nfs_fattr *fattr)
222{ 240{
223 struct nfs_clone_mount mountdata = { 241 struct nfs_clone_mount mountdata = {
224 .sb = mnt_parent->mnt_sb, 242 .sb = dentry->d_sb,
225 .dentry = dentry, 243 .dentry = dentry,
226 .fh = fh, 244 .fh = fh,
227 .fattr = fattr, 245 .fattr = fattr,
@@ -237,11 +255,11 @@ static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
237 dentry->d_name.name); 255 dentry->d_name.name);
238 if (page == NULL) 256 if (page == NULL)
239 goto out; 257 goto out;
240 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); 258 devname = nfs_devname(dentry, page, PAGE_SIZE);
241 mnt = (struct vfsmount *)devname; 259 mnt = (struct vfsmount *)devname;
242 if (IS_ERR(devname)) 260 if (IS_ERR(devname))
243 goto free_page; 261 goto free_page;
244 mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); 262 mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
245free_page: 263free_page:
246 free_page((unsigned long)page); 264 free_page((unsigned long)page);
247out: 265out:
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ce939c062a52..d0c80d8b3f96 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,4 +885,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
885 .lock = nfs3_proc_lock, 885 .lock = nfs3_proc_lock,
886 .clear_acl_cache = nfs3_forget_cached_acls, 886 .clear_acl_cache = nfs3_forget_cached_acls,
887 .close_context = nfs_close_context, 887 .close_context = nfs_close_context,
888 .init_client = nfs_init_client,
888}; 889};
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..c64be1cff080 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -252,6 +252,9 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
252extern int nfs4_setup_sequence(const struct nfs_server *server, 252extern int nfs4_setup_sequence(const struct nfs_server *server,
253 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 253 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
254 int cache_reply, struct rpc_task *task); 254 int cache_reply, struct rpc_task *task);
255extern int nfs41_setup_sequence(struct nfs4_session *session,
256 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
257 int cache_reply, struct rpc_task *task);
255extern void nfs4_destroy_session(struct nfs4_session *session); 258extern void nfs4_destroy_session(struct nfs4_session *session);
256extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 259extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
257extern int nfs4_proc_create_session(struct nfs_client *); 260extern int nfs4_proc_create_session(struct nfs_client *);
@@ -259,6 +262,19 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
259extern int nfs4_init_session(struct nfs_server *server); 262extern int nfs4_init_session(struct nfs_server *server);
260extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 263extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
261 struct nfs_fsinfo *fsinfo); 264 struct nfs_fsinfo *fsinfo);
265
266static inline bool
267is_ds_only_client(struct nfs_client *clp)
268{
269 return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) ==
270 EXCHGID4_FLAG_USE_PNFS_DS;
271}
272
273static inline bool
274is_ds_client(struct nfs_client *clp)
275{
276 return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
277}
262#else /* CONFIG_NFS_v4_1 */ 278#else /* CONFIG_NFS_v4_1 */
263static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 279static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
264{ 280{
@@ -276,6 +292,18 @@ static inline int nfs4_init_session(struct nfs_server *server)
276{ 292{
277 return 0; 293 return 0;
278} 294}
295
296static inline bool
297is_ds_only_client(struct nfs_client *clp)
298{
299 return false;
300}
301
302static inline bool
303is_ds_client(struct nfs_client *clp)
304{
305 return false;
306}
279#endif /* CONFIG_NFS_V4_1 */ 307#endif /* CONFIG_NFS_V4_1 */
280 308
281extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; 309extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
@@ -298,6 +326,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
298#if defined(CONFIG_NFS_V4_1) 326#if defined(CONFIG_NFS_V4_1)
299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 327struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 328struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
329extern void nfs4_schedule_session_recovery(struct nfs4_session *);
330#else
331static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
332{
333}
301#endif /* CONFIG_NFS_V4_1 */ 334#endif /* CONFIG_NFS_V4_1 */
302 335
303extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 336extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +340,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
307extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 340extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
308extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 341extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
309extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 342extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
310extern void nfs4_schedule_state_recovery(struct nfs_client *); 343extern void nfs4_schedule_lease_recovery(struct nfs_client *);
311extern void nfs4_schedule_state_manager(struct nfs_client *); 344extern void nfs4_schedule_state_manager(struct nfs_client *);
312extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 345extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
313extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
314extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 346extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
315extern void nfs41_handle_recall_slot(struct nfs_client *clp); 347extern void nfs41_handle_recall_slot(struct nfs_client *clp);
316extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 348extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 23f930caf1e2..428558464817 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -40,32 +40,309 @@ MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); 40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver"); 41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42 42
43static int 43#define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
44filelayout_set_layoutdriver(struct nfs_server *nfss) 44
45static loff_t
46filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
47 loff_t offset)
45{ 48{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, 49 u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
47 nfs4_fl_free_deviceid_callback); 50 u64 tmp;
48 if (status) { 51
49 printk(KERN_WARNING "%s: deviceid cache could not be " 52 offset -= flseg->pattern_offset;
50 "initialized\n", __func__); 53 tmp = offset;
51 return status; 54 do_div(tmp, stripe_width);
55
56 return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit);
57}
58
59/* This function is used by the layout driver to calculate the
60 * offset of the file on the dserver based on whether the
61 * layout type is STRIPE_DENSE or STRIPE_SPARSE
62 */
63static loff_t
64filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
65{
66 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
67
68 switch (flseg->stripe_type) {
69 case STRIPE_SPARSE:
70 return offset;
71
72 case STRIPE_DENSE:
73 return filelayout_get_dense_offset(flseg, offset);
52 } 74 }
53 dprintk("%s: deviceid cache has been initialized successfully\n", 75
54 __func__); 76 BUG();
77}
78
79/* For data server errors we don't recover from */
80static void
81filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
82{
83 if (lseg->pls_range.iomode == IOMODE_RW) {
84 dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
85 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
86 } else {
87 dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
88 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
89 }
90}
91
92static int filelayout_async_handle_error(struct rpc_task *task,
93 struct nfs4_state *state,
94 struct nfs_client *clp,
95 int *reset)
96{
97 if (task->tk_status >= 0)
98 return 0;
99
100 *reset = 0;
101
102 switch (task->tk_status) {
103 case -NFS4ERR_BADSESSION:
104 case -NFS4ERR_BADSLOT:
105 case -NFS4ERR_BAD_HIGH_SLOT:
106 case -NFS4ERR_DEADSESSION:
107 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
108 case -NFS4ERR_SEQ_FALSE_RETRY:
109 case -NFS4ERR_SEQ_MISORDERED:
110 dprintk("%s ERROR %d, Reset session. Exchangeid "
111 "flags 0x%x\n", __func__, task->tk_status,
112 clp->cl_exchange_flags);
113 nfs4_schedule_session_recovery(clp->cl_session);
114 break;
115 case -NFS4ERR_DELAY:
116 case -NFS4ERR_GRACE:
117 case -EKEYEXPIRED:
118 rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
119 break;
120 default:
121 dprintk("%s DS error. Retry through MDS %d\n", __func__,
122 task->tk_status);
123 *reset = 1;
124 break;
125 }
126 task->tk_status = 0;
127 return -EAGAIN;
128}
129
130/* NFS_PROTO call done callback routines */
131
132static int filelayout_read_done_cb(struct rpc_task *task,
133 struct nfs_read_data *data)
134{
135 struct nfs_client *clp = data->ds_clp;
136 int reset = 0;
137
138 dprintk("%s DS read\n", __func__);
139
140 if (filelayout_async_handle_error(task, data->args.context->state,
141 data->ds_clp, &reset) == -EAGAIN) {
142 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
143 __func__, data->ds_clp, data->ds_clp->cl_session);
144 if (reset) {
145 filelayout_set_lo_fail(data->lseg);
146 nfs4_reset_read(task, data);
147 clp = NFS_SERVER(data->inode)->nfs_client;
148 }
149 nfs_restart_rpc(task, clp);
150 return -EAGAIN;
151 }
152
55 return 0; 153 return 0;
56} 154}
57 155
58/* Clear out the layout by destroying its device list */ 156/*
59static int 157 * Call ops for the async read/write cases
60filelayout_clear_layoutdriver(struct nfs_server *nfss) 158 * In the case of dense layouts, the offset needs to be reset to its
159 * original value.
160 */
161static void filelayout_read_prepare(struct rpc_task *task, void *data)
61{ 162{
62 dprintk("--> %s\n", __func__); 163 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
164
165 rdata->read_done_cb = filelayout_read_done_cb;
166
167 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
168 &rdata->args.seq_args, &rdata->res.seq_res,
169 0, task))
170 return;
171
172 rpc_call_start(task);
173}
174
175static void filelayout_read_call_done(struct rpc_task *task, void *data)
176{
177 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
178
179 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
180
181 /* Note this may cause RPC to be resent */
182 rdata->mds_ops->rpc_call_done(task, data);
183}
184
185static void filelayout_read_release(void *data)
186{
187 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
188
189 rdata->mds_ops->rpc_release(data);
190}
191
192static int filelayout_write_done_cb(struct rpc_task *task,
193 struct nfs_write_data *data)
194{
195 int reset = 0;
196
197 if (filelayout_async_handle_error(task, data->args.context->state,
198 data->ds_clp, &reset) == -EAGAIN) {
199 struct nfs_client *clp;
200
201 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
202 __func__, data->ds_clp, data->ds_clp->cl_session);
203 if (reset) {
204 filelayout_set_lo_fail(data->lseg);
205 nfs4_reset_write(task, data);
206 clp = NFS_SERVER(data->inode)->nfs_client;
207 } else
208 clp = data->ds_clp;
209 nfs_restart_rpc(task, clp);
210 return -EAGAIN;
211 }
63 212
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0; 213 return 0;
67} 214}
68 215
216static void filelayout_write_prepare(struct rpc_task *task, void *data)
217{
218 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
219
220 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
221 &wdata->args.seq_args, &wdata->res.seq_res,
222 0, task))
223 return;
224
225 rpc_call_start(task);
226}
227
228static void filelayout_write_call_done(struct rpc_task *task, void *data)
229{
230 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
231
232 /* Note this may cause RPC to be resent */
233 wdata->mds_ops->rpc_call_done(task, data);
234}
235
236static void filelayout_write_release(void *data)
237{
238 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
239
240 wdata->mds_ops->rpc_release(data);
241}
242
243struct rpc_call_ops filelayout_read_call_ops = {
244 .rpc_call_prepare = filelayout_read_prepare,
245 .rpc_call_done = filelayout_read_call_done,
246 .rpc_release = filelayout_read_release,
247};
248
249struct rpc_call_ops filelayout_write_call_ops = {
250 .rpc_call_prepare = filelayout_write_prepare,
251 .rpc_call_done = filelayout_write_call_done,
252 .rpc_release = filelayout_write_release,
253};
254
255static enum pnfs_try_status
256filelayout_read_pagelist(struct nfs_read_data *data)
257{
258 struct pnfs_layout_segment *lseg = data->lseg;
259 struct nfs4_pnfs_ds *ds;
260 loff_t offset = data->args.offset;
261 u32 j, idx;
262 struct nfs_fh *fh;
263 int status;
264
265 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
266 __func__, data->inode->i_ino,
267 data->args.pgbase, (size_t)data->args.count, offset);
268
269 /* Retrieve the correct rpc_client for the byte range */
270 j = nfs4_fl_calc_j_index(lseg, offset);
271 idx = nfs4_fl_calc_ds_index(lseg, j);
272 ds = nfs4_fl_prepare_ds(lseg, idx);
273 if (!ds) {
274 /* Either layout fh index faulty, or ds connect failed */
275 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
276 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
277 return PNFS_NOT_ATTEMPTED;
278 }
279 dprintk("%s USE DS:ip %x %hu\n", __func__,
280 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
281
282 /* No multipath support. Use first DS */
283 data->ds_clp = ds->ds_clp;
284 fh = nfs4_fl_select_ds_fh(lseg, j);
285 if (fh)
286 data->args.fh = fh;
287
288 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
289 data->mds_offset = offset;
290
291 /* Perform an asynchronous read to ds */
292 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
293 &filelayout_read_call_ops);
294 BUG_ON(status != 0);
295 return PNFS_ATTEMPTED;
296}
297
298/* Perform async writes. */
299static enum pnfs_try_status
300filelayout_write_pagelist(struct nfs_write_data *data, int sync)
301{
302 struct pnfs_layout_segment *lseg = data->lseg;
303 struct nfs4_pnfs_ds *ds;
304 loff_t offset = data->args.offset;
305 u32 j, idx;
306 struct nfs_fh *fh;
307 int status;
308
309 /* Retrieve the correct rpc_client for the byte range */
310 j = nfs4_fl_calc_j_index(lseg, offset);
311 idx = nfs4_fl_calc_ds_index(lseg, j);
312 ds = nfs4_fl_prepare_ds(lseg, idx);
313 if (!ds) {
314 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
315 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
316 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
317 return PNFS_NOT_ATTEMPTED;
318 }
319 dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
320 data->inode->i_ino, sync, (size_t) data->args.count, offset,
321 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
322
323 /* We can't handle commit to ds yet */
324 if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
325 data->args.stable = NFS_FILE_SYNC;
326
327 data->write_done_cb = filelayout_write_done_cb;
328 data->ds_clp = ds->ds_clp;
329 fh = nfs4_fl_select_ds_fh(lseg, j);
330 if (fh)
331 data->args.fh = fh;
332 /*
333 * Get the file offset on the dserver. Set the write offset to
334 * this offset and save the original offset.
335 */
336 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
337 data->mds_offset = offset;
338
339 /* Perform an asynchronous write */
340 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
341 &filelayout_write_call_ops, sync);
342 BUG_ON(status != 0);
343 return PNFS_ATTEMPTED;
344}
345
69/* 346/*
70 * filelayout_check_layout() 347 * filelayout_check_layout()
71 * 348 *
@@ -92,14 +369,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
92 goto out; 369 goto out;
93 } 370 }
94 371
95 if (fl->stripe_unit % PAGE_SIZE) { 372 if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n", 373 dprintk("%s Invalid stripe unit (%u)\n",
97 __func__, fl->stripe_unit); 374 __func__, fl->stripe_unit);
98 goto out; 375 goto out;
99 } 376 }
100 377
101 /* find and reference the deviceid */ 378 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); 379 dsaddr = nfs4_fl_find_get_deviceid(id);
103 if (dsaddr == NULL) { 380 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->plh_inode, id); 381 dsaddr = get_device_info(lo->plh_inode, id);
105 if (dsaddr == NULL) 382 if (dsaddr == NULL)
@@ -134,7 +411,7 @@ out:
134 dprintk("--> %s returns %d\n", __func__, status); 411 dprintk("--> %s returns %d\n", __func__, status);
135 return status; 412 return status;
136out_put: 413out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); 414 nfs4_fl_put_deviceid(dsaddr);
138 goto out; 415 goto out;
139} 416}
140 417
@@ -243,23 +520,47 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
243static void 520static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg) 521filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{ 522{
246 struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 523 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248 524
249 dprintk("--> %s\n", __func__); 525 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, 526 nfs4_fl_put_deviceid(fl->dsaddr);
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl); 527 _filelayout_free_lseg(fl);
253} 528}
254 529
530/*
531 * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
532 *
533 * return 1 : coalesce page
534 * return 0 : don't coalesce page
535 */
536int
537filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
538 struct nfs_page *req)
539{
540 u64 p_stripe, r_stripe;
541 u32 stripe_unit;
542
543 if (!pgio->pg_lseg)
544 return 1;
545 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
546 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
547 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
548
549 do_div(p_stripe, stripe_unit);
550 do_div(r_stripe, stripe_unit);
551
552 return (p_stripe == r_stripe);
553}
554
255static struct pnfs_layoutdriver_type filelayout_type = { 555static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES, 556 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES", 557 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE, 558 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver, 559 .alloc_lseg = filelayout_alloc_lseg,
260 .clear_layoutdriver = filelayout_clear_layoutdriver, 560 .free_lseg = filelayout_free_lseg,
261 .alloc_lseg = filelayout_alloc_lseg, 561 .pg_test = filelayout_pg_test,
262 .free_lseg = filelayout_free_lseg, 562 .read_pagelist = filelayout_read_pagelist,
563 .write_pagelist = filelayout_write_pagelist,
263}; 564};
264 565
265static int __init nfs4filelayout_init(void) 566static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index bbf60dd2ab9d..ee0c907742b5 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -55,8 +55,14 @@ struct nfs4_pnfs_ds {
55 atomic_t ds_count; 55 atomic_t ds_count;
56}; 56};
57 57
58/* nfs4_file_layout_dsaddr flags */
59#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
60
58struct nfs4_file_layout_dsaddr { 61struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid; 62 struct hlist_node node;
63 struct nfs4_deviceid deviceid;
64 atomic_t ref;
65 unsigned long flags;
60 u32 stripe_count; 66 u32 stripe_count;
61 u8 *stripe_indices; 67 u8 *stripe_indices;
62 u32 ds_num; 68 u32 ds_num;
@@ -83,11 +89,18 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
83 generic_hdr); 89 generic_hdr);
84} 90}
85 91
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); 92extern struct nfs_fh *
93nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
94
87extern void print_ds(struct nfs4_pnfs_ds *ds); 95extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id); 96extern void print_deviceid(struct nfs4_deviceid *dev_id);
97u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
98u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
99struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
100 u32 ds_idx);
89extern struct nfs4_file_layout_dsaddr * 101extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); 102nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
103extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
91struct nfs4_file_layout_dsaddr * 104struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); 105get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93 106
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..68143c162e3b 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -37,6 +37,30 @@
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD 37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38 38
39/* 39/*
40 * Device ID RCU cache. A device ID is unique per client ID and layout type.
41 */
42#define NFS4_FL_DEVICE_ID_HASH_BITS 5
43#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
44#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
45
46static inline u32
47nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
48{
49 unsigned char *cptr = (unsigned char *)id->data;
50 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
51 u32 x = 0;
52
53 while (nbytes--) {
54 x *= 37;
55 x += *cptr++;
56 }
57 return x & NFS4_FL_DEVICE_ID_HASH_MASK;
58}
59
60static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
61static DEFINE_SPINLOCK(filelayout_deviceid_lock);
62
63/*
40 * Data server cache 64 * Data server cache
41 * 65 *
42 * Data servers can be mapped to different device ids. 66 * Data servers can be mapped to different device ids.
@@ -104,6 +128,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port)
104 return NULL; 128 return NULL;
105} 129}
106 130
131/*
132 * Create an rpc connection to the nfs4_pnfs_ds data server
133 * Currently only support IPv4
134 */
135static int
136nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
137{
138 struct nfs_client *clp;
139 struct sockaddr_in sin;
140 int status = 0;
141
142 dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
143 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
144 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
145
146 sin.sin_family = AF_INET;
147 sin.sin_addr.s_addr = ds->ds_ip_addr;
148 sin.sin_port = ds->ds_port;
149
150 clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
151 sizeof(sin), IPPROTO_TCP);
152 if (IS_ERR(clp)) {
153 status = PTR_ERR(clp);
154 goto out;
155 }
156
157 if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
158 if (!is_ds_client(clp)) {
159 status = -ENODEV;
160 goto out_put;
161 }
162 ds->ds_clp = clp;
163 dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
164 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
165 goto out;
166 }
167
168 /*
169 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
170 * be equal to the MDS lease. Renewal is scheduled in create_session.
171 */
172 spin_lock(&mds_srv->nfs_client->cl_lock);
173 clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
174 spin_unlock(&mds_srv->nfs_client->cl_lock);
175 clp->cl_last_renewal = jiffies;
176
177 /* New nfs_client */
178 status = nfs4_init_ds_session(clp);
179 if (status)
180 goto out_put;
181
182 ds->ds_clp = clp;
183 dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
184 ntohs(ds->ds_port));
185out:
186 return status;
187out_put:
188 nfs_put_client(clp);
189 goto out;
190}
191
107static void 192static void
108destroy_ds(struct nfs4_pnfs_ds *ds) 193destroy_ds(struct nfs4_pnfs_ds *ds)
109{ 194{
@@ -122,7 +207,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
122 struct nfs4_pnfs_ds *ds; 207 struct nfs4_pnfs_ds *ds;
123 int i; 208 int i;
124 209
125 print_deviceid(&dsaddr->deviceid.de_id); 210 print_deviceid(&dsaddr->deviceid);
126 211
127 for (i = 0; i < dsaddr->ds_num; i++) { 212 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i]; 213 ds = dsaddr->ds_list[i];
@@ -139,15 +224,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
139 kfree(dsaddr); 224 kfree(dsaddr);
140} 225}
141 226
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds * 227static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) 228nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{ 229{
@@ -219,6 +295,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
219 goto out_err; 295 goto out_err;
220 } 296 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL); 297 buf = kmalloc(rlen + 1, GFP_KERNEL);
298 if (!buf) {
299 dprintk("%s: Not enough memory\n", __func__);
300 goto out_err;
301 }
222 buf[rlen] = '\0'; 302 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen); 303 memcpy(buf, r_addr, rlen);
224 304
@@ -296,7 +376,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
296 dsaddr->stripe_count = cnt; 376 dsaddr->stripe_count = cnt;
297 dsaddr->ds_num = num; 377 dsaddr->ds_num = num;
298 378
299 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); 379 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
300 380
301 /* Go back an read stripe indices */ 381 /* Go back an read stripe indices */
302 p = indicesp; 382 p = indicesp;
@@ -346,28 +426,37 @@ out_err:
346} 426}
347 427
348/* 428/*
349 * Decode the opaque device specified in 'dev' 429 * Decode the opaque device specified in 'dev' and add it to the cache of
350 * and add it to the list of available devices. 430 * available devices.
351 * If the deviceid is already cached, nfs4_add_deviceid will return
352 * a pointer to the cached struct and throw away the new.
353 */ 431 */
354static struct nfs4_file_layout_dsaddr* 432static struct nfs4_file_layout_dsaddr *
355decode_and_add_device(struct inode *inode, struct pnfs_device *dev) 433decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
356{ 434{
357 struct nfs4_file_layout_dsaddr *dsaddr; 435 struct nfs4_file_layout_dsaddr *d, *new;
358 struct pnfs_deviceid_node *d; 436 long hash;
359 437
360 dsaddr = decode_device(inode, dev); 438 new = decode_device(inode, dev);
361 if (!dsaddr) { 439 if (!new) {
362 printk(KERN_WARNING "%s: Could not decode or add device\n", 440 printk(KERN_WARNING "%s: Could not decode or add device\n",
363 __func__); 441 __func__);
364 return NULL; 442 return NULL;
365 } 443 }
366 444
367 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, 445 spin_lock(&filelayout_deviceid_lock);
368 &dsaddr->deviceid); 446 d = nfs4_fl_find_get_deviceid(&new->deviceid);
447 if (d) {
448 spin_unlock(&filelayout_deviceid_lock);
449 nfs4_fl_free_deviceid(new);
450 return d;
451 }
452
453 INIT_HLIST_NODE(&new->node);
454 atomic_set(&new->ref, 1);
455 hash = nfs4_fl_deviceid_hash(&new->deviceid);
456 hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
457 spin_unlock(&filelayout_deviceid_lock);
369 458
370 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); 459 return new;
371} 460}
372 461
373/* 462/*
@@ -442,12 +531,123 @@ out_free:
442 return dsaddr; 531 return dsaddr;
443} 532}
444 533
534void
535nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
536{
537 if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
538 hlist_del_rcu(&dsaddr->node);
539 spin_unlock(&filelayout_deviceid_lock);
540
541 synchronize_rcu();
542 nfs4_fl_free_deviceid(dsaddr);
543 }
544}
545
445struct nfs4_file_layout_dsaddr * 546struct nfs4_file_layout_dsaddr *
446nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) 547nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
548{
549 struct nfs4_file_layout_dsaddr *d;
550 struct hlist_node *n;
551 long hash = nfs4_fl_deviceid_hash(id);
552
553
554 rcu_read_lock();
555 hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
556 if (!memcmp(&d->deviceid, id, sizeof(*id))) {
557 if (!atomic_inc_not_zero(&d->ref))
558 goto fail;
559 rcu_read_unlock();
560 return d;
561 }
562 }
563fail:
564 rcu_read_unlock();
565 return NULL;
566}
567
568/*
569 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
570 * Then: ((res + fsi) % dsaddr->stripe_count)
571 */
572u32
573nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
574{
575 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
576 u64 tmp;
577
578 tmp = offset - flseg->pattern_offset;
579 do_div(tmp, flseg->stripe_unit);
580 tmp += flseg->first_stripe_index;
581 return do_div(tmp, flseg->dsaddr->stripe_count);
582}
583
584u32
585nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
447{ 586{
448 struct pnfs_deviceid_node *d; 587 return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
588}
449 589
450 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); 590struct nfs_fh *
451 return (d == NULL) ? NULL : 591nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
452 container_of(d, struct nfs4_file_layout_dsaddr, deviceid); 592{
593 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
594 u32 i;
595
596 if (flseg->stripe_type == STRIPE_SPARSE) {
597 if (flseg->num_fh == 1)
598 i = 0;
599 else if (flseg->num_fh == 0)
600 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
601 return NULL;
602 else
603 i = nfs4_fl_calc_ds_index(lseg, j);
604 } else
605 i = j;
606 return flseg->fh_array[i];
607}
608
609static void
610filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
611 int err, u32 ds_addr)
612{
613 u32 *p = (u32 *)&dsaddr->deviceid;
614
615 printk(KERN_ERR "NFS: data server %x connection error %d."
616 " Deviceid [%x%x%x%x] marked out of use.\n",
617 ds_addr, err, p[0], p[1], p[2], p[3]);
618
619 spin_lock(&filelayout_deviceid_lock);
620 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
621 spin_unlock(&filelayout_deviceid_lock);
622}
623
624struct nfs4_pnfs_ds *
625nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
626{
627 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
628 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
629
630 if (ds == NULL) {
631 printk(KERN_ERR "%s: No data server for offset index %d\n",
632 __func__, ds_idx);
633 return NULL;
634 }
635
636 if (!ds->ds_clp) {
637 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
638 int err;
639
640 if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
641 /* Already tried to connect, don't try again */
642 dprintk("%s Deviceid marked out of use\n", __func__);
643 return NULL;
644 }
645 err = nfs4_ds_connect(s, ds);
646 if (err) {
647 filelayout_mark_devid_negative(dsaddr, err,
648 ntohl(ds->ds_ip_addr));
649 return NULL;
650 }
651 }
652 return ds;
453} 653}
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3c2a1724fbd2..bb80c49b6533 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -54,33 +54,29 @@ Elong:
54/* 54/*
55 * Determine the mount path as a string 55 * Determine the mount path as a string
56 */ 56 */
57static char *nfs4_path(const struct vfsmount *mnt_parent, 57static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
58 const struct dentry *dentry,
59 char *buffer, ssize_t buflen)
60{ 58{
61 const char *srvpath; 59 char *limit;
62 60 char *path = nfs_path(&limit, dentry, buffer, buflen);
63 srvpath = strchr(mnt_parent->mnt_devname, ':'); 61 if (!IS_ERR(path)) {
64 if (srvpath) 62 char *colon = strchr(path, ':');
65 srvpath++; 63 if (colon && colon < limit)
66 else 64 path = colon + 1;
67 srvpath = mnt_parent->mnt_devname; 65 }
68 66 return path;
69 return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
70} 67}
71 68
72/* 69/*
73 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we 70 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
74 * believe to be the server path to this dentry 71 * believe to be the server path to this dentry
75 */ 72 */
76static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, 73static int nfs4_validate_fspath(struct dentry *dentry,
77 const struct dentry *dentry,
78 const struct nfs4_fs_locations *locations, 74 const struct nfs4_fs_locations *locations,
79 char *page, char *page2) 75 char *page, char *page2)
80{ 76{
81 const char *path, *fs_path; 77 const char *path, *fs_path;
82 78
83 path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE); 79 path = nfs4_path(dentry, page, PAGE_SIZE);
84 if (IS_ERR(path)) 80 if (IS_ERR(path))
85 return PTR_ERR(path); 81 return PTR_ERR(path);
86 82
@@ -165,20 +161,18 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
165 161
166/** 162/**
167 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error 163 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
168 * @mnt_parent - mountpoint of parent directory
169 * @dentry - parent directory 164 * @dentry - parent directory
170 * @locations - array of NFSv4 server location information 165 * @locations - array of NFSv4 server location information
171 * 166 *
172 */ 167 */
173static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, 168static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
174 const struct dentry *dentry,
175 const struct nfs4_fs_locations *locations) 169 const struct nfs4_fs_locations *locations)
176{ 170{
177 struct vfsmount *mnt = ERR_PTR(-ENOENT); 171 struct vfsmount *mnt = ERR_PTR(-ENOENT);
178 struct nfs_clone_mount mountdata = { 172 struct nfs_clone_mount mountdata = {
179 .sb = mnt_parent->mnt_sb, 173 .sb = dentry->d_sb,
180 .dentry = dentry, 174 .dentry = dentry,
181 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, 175 .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
182 }; 176 };
183 char *page = NULL, *page2 = NULL; 177 char *page = NULL, *page2 = NULL;
184 int loc, error; 178 int loc, error;
@@ -198,7 +192,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
198 goto out; 192 goto out;
199 193
200 /* Ensure fs path is a prefix of current dentry path */ 194 /* Ensure fs path is a prefix of current dentry path */
201 error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2); 195 error = nfs4_validate_fspath(dentry, locations, page, page2);
202 if (error < 0) { 196 if (error < 0) {
203 mnt = ERR_PTR(error); 197 mnt = ERR_PTR(error);
204 goto out; 198 goto out;
@@ -225,11 +219,10 @@ out:
225 219
226/* 220/*
227 * nfs_do_refmount - handle crossing a referral on server 221 * nfs_do_refmount - handle crossing a referral on server
228 * @mnt_parent - mountpoint of referral
229 * @dentry - dentry of referral 222 * @dentry - dentry of referral
230 * 223 *
231 */ 224 */
232struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 225struct vfsmount *nfs_do_refmount(struct dentry *dentry)
233{ 226{
234 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 227 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
235 struct dentry *parent; 228 struct dentry *parent;
@@ -262,7 +255,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
262 fs_locations->fs_path.ncomponents <= 0) 255 fs_locations->fs_path.ncomponents <= 0)
263 goto out_free; 256 goto out_free;
264 257
265 mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); 258 mnt = nfs_follow_referral(dentry, fs_locations);
266out_free: 259out_free:
267 __free_page(page); 260 __free_page(page);
268 kfree(fs_locations); 261 kfree(fs_locations);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..1d84e7088af9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -85,6 +85,9 @@ static int nfs4_map_errors(int err)
85 switch (err) { 85 switch (err) {
86 case -NFS4ERR_RESOURCE: 86 case -NFS4ERR_RESOURCE:
87 return -EREMOTEIO; 87 return -EREMOTEIO;
88 case -NFS4ERR_BADOWNER:
89 case -NFS4ERR_BADNAME:
90 return -EINVAL;
88 default: 91 default:
89 dprintk("%s could not handle NFSv4 error %d\n", 92 dprintk("%s could not handle NFSv4 error %d\n",
90 __func__, -err); 93 __func__, -err);
@@ -241,7 +244,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
241/* This is the error handling routine for processes that are allowed 244/* This is the error handling routine for processes that are allowed
242 * to sleep. 245 * to sleep.
243 */ 246 */
244static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) 247static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
245{ 248{
246 struct nfs_client *clp = server->nfs_client; 249 struct nfs_client *clp = server->nfs_client;
247 struct nfs4_state *state = exception->state; 250 struct nfs4_state *state = exception->state;
@@ -256,12 +259,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
256 case -NFS4ERR_OPENMODE: 259 case -NFS4ERR_OPENMODE:
257 if (state == NULL) 260 if (state == NULL)
258 break; 261 break;
259 nfs4_state_mark_reclaim_nograce(clp, state); 262 nfs4_schedule_stateid_recovery(server, state);
260 goto do_state_recovery; 263 goto wait_on_recovery;
261 case -NFS4ERR_STALE_STATEID: 264 case -NFS4ERR_STALE_STATEID:
262 case -NFS4ERR_STALE_CLIENTID: 265 case -NFS4ERR_STALE_CLIENTID:
263 case -NFS4ERR_EXPIRED: 266 case -NFS4ERR_EXPIRED:
264 goto do_state_recovery; 267 nfs4_schedule_lease_recovery(clp);
268 goto wait_on_recovery;
265#if defined(CONFIG_NFS_V4_1) 269#if defined(CONFIG_NFS_V4_1)
266 case -NFS4ERR_BADSESSION: 270 case -NFS4ERR_BADSESSION:
267 case -NFS4ERR_BADSLOT: 271 case -NFS4ERR_BADSLOT:
@@ -272,7 +276,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
272 case -NFS4ERR_SEQ_MISORDERED: 276 case -NFS4ERR_SEQ_MISORDERED:
273 dprintk("%s ERROR: %d Reset session\n", __func__, 277 dprintk("%s ERROR: %d Reset session\n", __func__,
274 errorcode); 278 errorcode);
275 nfs4_schedule_state_recovery(clp); 279 nfs4_schedule_session_recovery(clp->cl_session);
276 exception->retry = 1; 280 exception->retry = 1;
277 break; 281 break;
278#endif /* defined(CONFIG_NFS_V4_1) */ 282#endif /* defined(CONFIG_NFS_V4_1) */
@@ -292,11 +296,23 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
292 break; 296 break;
293 case -NFS4ERR_OLD_STATEID: 297 case -NFS4ERR_OLD_STATEID:
294 exception->retry = 1; 298 exception->retry = 1;
299 break;
300 case -NFS4ERR_BADOWNER:
301 /* The following works around a Linux server bug! */
302 case -NFS4ERR_BADNAME:
303 if (server->caps & NFS_CAP_UIDGID_NOMAP) {
304 server->caps &= ~NFS_CAP_UIDGID_NOMAP;
305 exception->retry = 1;
306 printk(KERN_WARNING "NFS: v4 server %s "
307 "does not accept raw "
308 "uid/gids. "
309 "Reenabling the idmapper.\n",
310 server->nfs_client->cl_hostname);
311 }
295 } 312 }
296 /* We failed to handle the error */ 313 /* We failed to handle the error */
297 return nfs4_map_errors(ret); 314 return nfs4_map_errors(ret);
298do_state_recovery: 315wait_on_recovery:
299 nfs4_schedule_state_recovery(clp);
300 ret = nfs4_wait_clnt_recover(clp); 316 ret = nfs4_wait_clnt_recover(clp);
301 if (ret == 0) 317 if (ret == 0)
302 exception->retry = 1; 318 exception->retry = 1;
@@ -435,8 +451,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
435 clp = res->sr_session->clp; 451 clp = res->sr_session->clp;
436 do_renew_lease(clp, timestamp); 452 do_renew_lease(clp, timestamp);
437 /* Check sequence flags */ 453 /* Check sequence flags */
438 if (atomic_read(&clp->cl_count) > 1) 454 if (res->sr_status_flags != 0)
439 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 455 nfs4_schedule_lease_recovery(clp);
440 break; 456 break;
441 case -NFS4ERR_DELAY: 457 case -NFS4ERR_DELAY:
442 /* The server detected a resend of the RPC call and 458 /* The server detected a resend of the RPC call and
@@ -505,7 +521,7 @@ out:
505 return ret_id; 521 return ret_id;
506} 522}
507 523
508static int nfs41_setup_sequence(struct nfs4_session *session, 524int nfs41_setup_sequence(struct nfs4_session *session,
509 struct nfs4_sequence_args *args, 525 struct nfs4_sequence_args *args,
510 struct nfs4_sequence_res *res, 526 struct nfs4_sequence_res *res,
511 int cache_reply, 527 int cache_reply,
@@ -571,6 +587,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
571 res->sr_status = 1; 587 res->sr_status = 1;
572 return 0; 588 return 0;
573} 589}
590EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
574 591
575int nfs4_setup_sequence(const struct nfs_server *server, 592int nfs4_setup_sequence(const struct nfs_server *server,
576 struct nfs4_sequence_args *args, 593 struct nfs4_sequence_args *args,
@@ -1255,14 +1272,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1255 case -NFS4ERR_BAD_HIGH_SLOT: 1272 case -NFS4ERR_BAD_HIGH_SLOT:
1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1273 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1257 case -NFS4ERR_DEADSESSION: 1274 case -NFS4ERR_DEADSESSION:
1258 nfs4_schedule_state_recovery( 1275 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
1259 server->nfs_client);
1260 goto out; 1276 goto out;
1261 case -NFS4ERR_STALE_CLIENTID: 1277 case -NFS4ERR_STALE_CLIENTID:
1262 case -NFS4ERR_STALE_STATEID: 1278 case -NFS4ERR_STALE_STATEID:
1263 case -NFS4ERR_EXPIRED: 1279 case -NFS4ERR_EXPIRED:
1264 /* Don't recall a delegation if it was lost */ 1280 /* Don't recall a delegation if it was lost */
1265 nfs4_schedule_state_recovery(server->nfs_client); 1281 nfs4_schedule_lease_recovery(server->nfs_client);
1266 goto out; 1282 goto out;
1267 case -ERESTARTSYS: 1283 case -ERESTARTSYS:
1268 /* 1284 /*
@@ -1271,7 +1287,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1271 */ 1287 */
1272 case -NFS4ERR_ADMIN_REVOKED: 1288 case -NFS4ERR_ADMIN_REVOKED:
1273 case -NFS4ERR_BAD_STATEID: 1289 case -NFS4ERR_BAD_STATEID:
1274 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1290 nfs4_schedule_stateid_recovery(server, state);
1275 case -EKEYEXPIRED: 1291 case -EKEYEXPIRED:
1276 /* 1292 /*
1277 * User RPCSEC_GSS context has expired. 1293 * User RPCSEC_GSS context has expired.
@@ -1574,9 +1590,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1574 return 0; 1590 return 0;
1575} 1591}
1576 1592
1577static int nfs4_recover_expired_lease(struct nfs_server *server) 1593static int nfs4_client_recover_expired_lease(struct nfs_client *clp)
1578{ 1594{
1579 struct nfs_client *clp = server->nfs_client;
1580 unsigned int loop; 1595 unsigned int loop;
1581 int ret; 1596 int ret;
1582 1597
@@ -1587,12 +1602,17 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
1587 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1602 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1588 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1603 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1589 break; 1604 break;
1590 nfs4_schedule_state_recovery(clp); 1605 nfs4_schedule_state_manager(clp);
1591 ret = -EIO; 1606 ret = -EIO;
1592 } 1607 }
1593 return ret; 1608 return ret;
1594} 1609}
1595 1610
1611static int nfs4_recover_expired_lease(struct nfs_server *server)
1612{
1613 return nfs4_client_recover_expired_lease(server->nfs_client);
1614}
1615
1596/* 1616/*
1597 * OPEN_EXPIRED: 1617 * OPEN_EXPIRED:
1598 * reclaim state on the server after a network partition. 1618 * reclaim state on the server after a network partition.
@@ -3070,15 +3090,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3070 return err; 3090 return err;
3071} 3091}
3072 3092
3073static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 3093static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3074{ 3094{
3075 struct nfs_server *server = NFS_SERVER(data->inode); 3095 struct nfs_server *server = NFS_SERVER(data->inode);
3076 3096
3077 dprintk("--> %s\n", __func__);
3078
3079 if (!nfs4_sequence_done(task, &data->res.seq_res))
3080 return -EAGAIN;
3081
3082 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3097 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3083 nfs_restart_rpc(task, server->nfs_client); 3098 nfs_restart_rpc(task, server->nfs_client);
3084 return -EAGAIN; 3099 return -EAGAIN;
@@ -3090,19 +3105,44 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3090 return 0; 3105 return 0;
3091} 3106}
3092 3107
3108static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3109{
3110
3111 dprintk("--> %s\n", __func__);
3112
3113 if (!nfs4_sequence_done(task, &data->res.seq_res))
3114 return -EAGAIN;
3115
3116 return data->read_done_cb(task, data);
3117}
3118
3093static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 3119static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
3094{ 3120{
3095 data->timestamp = jiffies; 3121 data->timestamp = jiffies;
3122 data->read_done_cb = nfs4_read_done_cb;
3096 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 3123 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
3097} 3124}
3098 3125
3099static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) 3126/* Reset the the nfs_read_data to send the read to the MDS. */
3127void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3128{
3129 dprintk("%s Reset task for i/o through\n", __func__);
3130 put_lseg(data->lseg);
3131 data->lseg = NULL;
3132 /* offsets will differ in the dense stripe case */
3133 data->args.offset = data->mds_offset;
3134 data->ds_clp = NULL;
3135 data->args.fh = NFS_FH(data->inode);
3136 data->read_done_cb = nfs4_read_done_cb;
3137 task->tk_ops = data->mds_ops;
3138 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3139}
3140EXPORT_SYMBOL_GPL(nfs4_reset_read);
3141
3142static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3100{ 3143{
3101 struct inode *inode = data->inode; 3144 struct inode *inode = data->inode;
3102 3145
3103 if (!nfs4_sequence_done(task, &data->res.seq_res))
3104 return -EAGAIN;
3105
3106 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3146 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3107 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3147 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
3108 return -EAGAIN; 3148 return -EAGAIN;
@@ -3114,11 +3154,41 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3114 return 0; 3154 return 0;
3115} 3155}
3116 3156
3157static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3158{
3159 if (!nfs4_sequence_done(task, &data->res.seq_res))
3160 return -EAGAIN;
3161 return data->write_done_cb(task, data);
3162}
3163
3164/* Reset the the nfs_write_data to send the write to the MDS. */
3165void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
3166{
3167 dprintk("%s Reset task for i/o through\n", __func__);
3168 put_lseg(data->lseg);
3169 data->lseg = NULL;
3170 data->ds_clp = NULL;
3171 data->write_done_cb = nfs4_write_done_cb;
3172 data->args.fh = NFS_FH(data->inode);
3173 data->args.bitmask = data->res.server->cache_consistency_bitmask;
3174 data->args.offset = data->mds_offset;
3175 data->res.fattr = &data->fattr;
3176 task->tk_ops = data->mds_ops;
3177 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3178}
3179EXPORT_SYMBOL_GPL(nfs4_reset_write);
3180
3117static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 3181static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
3118{ 3182{
3119 struct nfs_server *server = NFS_SERVER(data->inode); 3183 struct nfs_server *server = NFS_SERVER(data->inode);
3120 3184
3121 data->args.bitmask = server->cache_consistency_bitmask; 3185 if (data->lseg) {
3186 data->args.bitmask = NULL;
3187 data->res.fattr = NULL;
3188 } else
3189 data->args.bitmask = server->cache_consistency_bitmask;
3190 if (!data->write_done_cb)
3191 data->write_done_cb = nfs4_write_done_cb;
3122 data->res.server = server; 3192 data->res.server = server;
3123 data->timestamp = jiffies; 3193 data->timestamp = jiffies;
3124 3194
@@ -3178,7 +3248,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3178 if (task->tk_status < 0) { 3248 if (task->tk_status < 0) {
3179 /* Unless we're shutting down, schedule state recovery! */ 3249 /* Unless we're shutting down, schedule state recovery! */
3180 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3250 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
3181 nfs4_schedule_state_recovery(clp); 3251 nfs4_schedule_lease_recovery(clp);
3182 return; 3252 return;
3183 } 3253 }
3184 do_renew_lease(clp, timestamp); 3254 do_renew_lease(clp, timestamp);
@@ -3252,6 +3322,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3252 } 3322 }
3253} 3323}
3254 3324
3325static int buf_to_pages_noslab(const void *buf, size_t buflen,
3326 struct page **pages, unsigned int *pgbase)
3327{
3328 struct page *newpage, **spages;
3329 int rc = 0;
3330 size_t len;
3331 spages = pages;
3332
3333 do {
3334 len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
3335 newpage = alloc_page(GFP_KERNEL);
3336
3337 if (newpage == NULL)
3338 goto unwind;
3339 memcpy(page_address(newpage), buf, len);
3340 buf += len;
3341 buflen -= len;
3342 *pages++ = newpage;
3343 rc++;
3344 } while (buflen != 0);
3345
3346 return rc;
3347
3348unwind:
3349 for(; rc > 0; rc--)
3350 __free_page(spages[rc-1]);
3351 return -ENOMEM;
3352}
3353
3255struct nfs4_cached_acl { 3354struct nfs4_cached_acl {
3256 int cached; 3355 int cached;
3257 size_t len; 3356 size_t len;
@@ -3420,13 +3519,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3420 .rpc_argp = &arg, 3519 .rpc_argp = &arg,
3421 .rpc_resp = &res, 3520 .rpc_resp = &res,
3422 }; 3521 };
3423 int ret; 3522 int ret, i;
3424 3523
3425 if (!nfs4_server_supports_acls(server)) 3524 if (!nfs4_server_supports_acls(server))
3426 return -EOPNOTSUPP; 3525 return -EOPNOTSUPP;
3526 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3527 if (i < 0)
3528 return i;
3427 nfs_inode_return_delegation(inode); 3529 nfs_inode_return_delegation(inode);
3428 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3429 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3530 ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
3531
3532 /*
3533 * Free each page after tx, so the only ref left is
3534 * held by the network stack
3535 */
3536 for (; i > 0; i--)
3537 put_page(pages[i-1]);
3538
3430 /* 3539 /*
3431 * Acl update can result in inode attribute update. 3540 * Acl update can result in inode attribute update.
3432 * so mark the attribute cache invalid. 3541 * so mark the attribute cache invalid.
@@ -3464,12 +3573,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3464 case -NFS4ERR_OPENMODE: 3573 case -NFS4ERR_OPENMODE:
3465 if (state == NULL) 3574 if (state == NULL)
3466 break; 3575 break;
3467 nfs4_state_mark_reclaim_nograce(clp, state); 3576 nfs4_schedule_stateid_recovery(server, state);
3468 goto do_state_recovery; 3577 goto wait_on_recovery;
3469 case -NFS4ERR_STALE_STATEID: 3578 case -NFS4ERR_STALE_STATEID:
3470 case -NFS4ERR_STALE_CLIENTID: 3579 case -NFS4ERR_STALE_CLIENTID:
3471 case -NFS4ERR_EXPIRED: 3580 case -NFS4ERR_EXPIRED:
3472 goto do_state_recovery; 3581 nfs4_schedule_lease_recovery(clp);
3582 goto wait_on_recovery;
3473#if defined(CONFIG_NFS_V4_1) 3583#if defined(CONFIG_NFS_V4_1)
3474 case -NFS4ERR_BADSESSION: 3584 case -NFS4ERR_BADSESSION:
3475 case -NFS4ERR_BADSLOT: 3585 case -NFS4ERR_BADSLOT:
@@ -3480,7 +3590,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3480 case -NFS4ERR_SEQ_MISORDERED: 3590 case -NFS4ERR_SEQ_MISORDERED:
3481 dprintk("%s ERROR %d, Reset session\n", __func__, 3591 dprintk("%s ERROR %d, Reset session\n", __func__,
3482 task->tk_status); 3592 task->tk_status);
3483 nfs4_schedule_state_recovery(clp); 3593 nfs4_schedule_session_recovery(clp->cl_session);
3484 task->tk_status = 0; 3594 task->tk_status = 0;
3485 return -EAGAIN; 3595 return -EAGAIN;
3486#endif /* CONFIG_NFS_V4_1 */ 3596#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3607,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3497 } 3607 }
3498 task->tk_status = nfs4_map_errors(task->tk_status); 3608 task->tk_status = nfs4_map_errors(task->tk_status);
3499 return 0; 3609 return 0;
3500do_state_recovery: 3610wait_on_recovery:
3501 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3611 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3502 nfs4_schedule_state_recovery(clp);
3503 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 3612 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3504 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3613 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3505 task->tk_status = 0; 3614 task->tk_status = 0;
@@ -4110,7 +4219,7 @@ static void nfs4_lock_release(void *calldata)
4110 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 4219 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
4111 data->arg.lock_seqid); 4220 data->arg.lock_seqid);
4112 if (!IS_ERR(task)) 4221 if (!IS_ERR(task))
4113 rpc_put_task(task); 4222 rpc_put_task_async(task);
4114 dprintk("%s: cancelling lock!\n", __func__); 4223 dprintk("%s: cancelling lock!\n", __func__);
4115 } else 4224 } else
4116 nfs_free_seqid(data->arg.lock_seqid); 4225 nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4243,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4134 4243
4135static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) 4244static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4136{ 4245{
4137 struct nfs_client *clp = server->nfs_client;
4138 struct nfs4_state *state = lsp->ls_state;
4139
4140 switch (error) { 4246 switch (error) {
4141 case -NFS4ERR_ADMIN_REVOKED: 4247 case -NFS4ERR_ADMIN_REVOKED:
4142 case -NFS4ERR_BAD_STATEID: 4248 case -NFS4ERR_BAD_STATEID:
4143 case -NFS4ERR_EXPIRED: 4249 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4144 if (new_lock_owner != 0 || 4250 if (new_lock_owner != 0 ||
4145 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4251 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4146 nfs4_state_mark_reclaim_nograce(clp, state); 4252 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4147 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4148 break; 4253 break;
4149 case -NFS4ERR_STALE_STATEID: 4254 case -NFS4ERR_STALE_STATEID:
4150 if (new_lock_owner != 0 ||
4151 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4152 nfs4_state_mark_reclaim_reboot(clp, state);
4153 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4255 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4256 case -NFS4ERR_EXPIRED:
4257 nfs4_schedule_lease_recovery(server->nfs_client);
4154 }; 4258 };
4155} 4259}
4156 4260
@@ -4366,12 +4470,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4366 case -NFS4ERR_EXPIRED: 4470 case -NFS4ERR_EXPIRED:
4367 case -NFS4ERR_STALE_CLIENTID: 4471 case -NFS4ERR_STALE_CLIENTID:
4368 case -NFS4ERR_STALE_STATEID: 4472 case -NFS4ERR_STALE_STATEID:
4473 nfs4_schedule_lease_recovery(server->nfs_client);
4474 goto out;
4369 case -NFS4ERR_BADSESSION: 4475 case -NFS4ERR_BADSESSION:
4370 case -NFS4ERR_BADSLOT: 4476 case -NFS4ERR_BADSLOT:
4371 case -NFS4ERR_BAD_HIGH_SLOT: 4477 case -NFS4ERR_BAD_HIGH_SLOT:
4372 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4478 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4373 case -NFS4ERR_DEADSESSION: 4479 case -NFS4ERR_DEADSESSION:
4374 nfs4_schedule_state_recovery(server->nfs_client); 4480 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
4375 goto out; 4481 goto out;
4376 case -ERESTARTSYS: 4482 case -ERESTARTSYS:
4377 /* 4483 /*
@@ -4381,7 +4487,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4381 case -NFS4ERR_ADMIN_REVOKED: 4487 case -NFS4ERR_ADMIN_REVOKED:
4382 case -NFS4ERR_BAD_STATEID: 4488 case -NFS4ERR_BAD_STATEID:
4383 case -NFS4ERR_OPENMODE: 4489 case -NFS4ERR_OPENMODE:
4384 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4490 nfs4_schedule_stateid_recovery(server, state);
4385 err = 0; 4491 err = 0;
4386 goto out; 4492 goto out;
4387 case -EKEYEXPIRED: 4493 case -EKEYEXPIRED:
@@ -4988,10 +5094,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
4988 int status; 5094 int status;
4989 unsigned *ptr; 5095 unsigned *ptr;
4990 struct nfs4_session *session = clp->cl_session; 5096 struct nfs4_session *session = clp->cl_session;
5097 long timeout = 0;
5098 int err;
4991 5099
4992 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5100 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
4993 5101
4994 status = _nfs4_proc_create_session(clp); 5102 do {
5103 status = _nfs4_proc_create_session(clp);
5104 if (status == -NFS4ERR_DELAY) {
5105 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5106 if (err)
5107 status = err;
5108 }
5109 } while (status == -NFS4ERR_DELAY);
5110
4995 if (status) 5111 if (status)
4996 goto out; 5112 goto out;
4997 5113
@@ -5073,6 +5189,27 @@ int nfs4_init_session(struct nfs_server *server)
5073 return ret; 5189 return ret;
5074} 5190}
5075 5191
5192int nfs4_init_ds_session(struct nfs_client *clp)
5193{
5194 struct nfs4_session *session = clp->cl_session;
5195 int ret;
5196
5197 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
5198 return 0;
5199
5200 ret = nfs4_client_recover_expired_lease(clp);
5201 if (!ret)
5202 /* Test for the DS role */
5203 if (!is_ds_client(clp))
5204 ret = -ENODEV;
5205 if (!ret)
5206 ret = nfs4_check_client_ready(clp);
5207 return ret;
5208
5209}
5210EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
5211
5212
5076/* 5213/*
5077 * Renew the cl_session lease. 5214 * Renew the cl_session lease.
5078 */ 5215 */
@@ -5100,7 +5237,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5100 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5237 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5101 return -EAGAIN; 5238 return -EAGAIN;
5102 default: 5239 default:
5103 nfs4_schedule_state_recovery(clp); 5240 nfs4_schedule_lease_recovery(clp);
5104 } 5241 }
5105 return 0; 5242 return 0;
5106} 5243}
@@ -5187,7 +5324,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
5187 if (IS_ERR(task)) 5324 if (IS_ERR(task))
5188 ret = PTR_ERR(task); 5325 ret = PTR_ERR(task);
5189 else 5326 else
5190 rpc_put_task(task); 5327 rpc_put_task_async(task);
5191 dprintk("<-- %s status=%d\n", __func__, ret); 5328 dprintk("<-- %s status=%d\n", __func__, ret);
5192 return ret; 5329 return ret;
5193} 5330}
@@ -5203,8 +5340,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5203 goto out; 5340 goto out;
5204 } 5341 }
5205 ret = rpc_wait_for_completion_task(task); 5342 ret = rpc_wait_for_completion_task(task);
5206 if (!ret) 5343 if (!ret) {
5344 struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
5345
5346 if (task->tk_status == 0)
5347 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
5207 ret = task->tk_status; 5348 ret = task->tk_status;
5349 }
5208 rpc_put_task(task); 5350 rpc_put_task(task);
5209out: 5351out:
5210 dprintk("<-- %s status=%d\n", __func__, ret); 5352 dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5383,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5241 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5383 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5242 return -EAGAIN; 5384 return -EAGAIN;
5243 default: 5385 default:
5244 nfs4_schedule_state_recovery(clp); 5386 nfs4_schedule_lease_recovery(clp);
5245 } 5387 }
5246 return 0; 5388 return 0;
5247} 5389}
@@ -5309,6 +5451,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5309 status = PTR_ERR(task); 5451 status = PTR_ERR(task);
5310 goto out; 5452 goto out;
5311 } 5453 }
5454 status = nfs4_wait_for_completion_rpc_task(task);
5455 if (status == 0)
5456 status = task->tk_status;
5312 rpc_put_task(task); 5457 rpc_put_task(task);
5313 return 0; 5458 return 0;
5314out: 5459out:
@@ -5595,6 +5740,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5595 .clear_acl_cache = nfs4_zap_acl_attr, 5740 .clear_acl_cache = nfs4_zap_acl_attr,
5596 .close_context = nfs4_close_context, 5741 .close_context = nfs4_close_context,
5597 .open_context = nfs4_atomic_open, 5742 .open_context = nfs4_atomic_open,
5743 .init_client = nfs4_init_client,
5598}; 5744};
5599 5745
5600static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 5746static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 402143d75fc5..df8e7f3ca56d 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -64,12 +64,8 @@ nfs4_renew_state(struct work_struct *work)
64 ops = clp->cl_mvops->state_renewal_ops; 64 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 65 dprintk("%s: start\n", __func__);
66 66
67 rcu_read_lock(); 67 if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state))
68 if (list_empty(&clp->cl_superblocks)) {
69 rcu_read_unlock();
70 goto out; 68 goto out;
71 }
72 rcu_read_unlock();
73 69
74 spin_lock(&clp->cl_lock); 70 spin_lock(&clp->cl_lock);
75 lease = clp->cl_lease_time; 71 lease = clp->cl_lease_time;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..ab1bf5bb021f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -153,6 +153,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
153 int status; 153 int status;
154 struct nfs_fsinfo fsinfo; 154 struct nfs_fsinfo fsinfo;
155 155
156 if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
157 nfs4_schedule_state_renewal(clp);
158 return 0;
159 }
160
156 status = nfs4_proc_get_lease_time(clp, &fsinfo); 161 status = nfs4_proc_get_lease_time(clp, &fsinfo);
157 if (status == 0) { 162 if (status == 0) {
158 /* Update lease time and schedule renewal */ 163 /* Update lease time and schedule renewal */
@@ -1007,9 +1012,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
1007} 1012}
1008 1013
1009/* 1014/*
1010 * Schedule a state recovery attempt 1015 * Schedule a lease recovery attempt
1011 */ 1016 */
1012void nfs4_schedule_state_recovery(struct nfs_client *clp) 1017void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1013{ 1018{
1014 if (!clp) 1019 if (!clp)
1015 return; 1020 return;
@@ -1018,7 +1023,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
1018 nfs4_schedule_state_manager(clp); 1023 nfs4_schedule_state_manager(clp);
1019} 1024}
1020 1025
1021int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1026static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1022{ 1027{
1023 1028
1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1029 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1037,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
1032 return 1; 1037 return 1;
1033} 1038}
1034 1039
1035int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1040static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1036{ 1041{
1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1042 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1043 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1046,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
1041 return 1; 1046 return 1;
1042} 1047}
1043 1048
1049void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1050{
1051 struct nfs_client *clp = server->nfs_client;
1052
1053 nfs4_state_mark_reclaim_nograce(clp, state);
1054 nfs4_schedule_state_manager(clp);
1055}
1056
1044static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1057static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1045{ 1058{
1046 struct inode *inode = state->inode; 1059 struct inode *inode = state->inode;
@@ -1436,10 +1449,16 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1436} 1449}
1437 1450
1438#ifdef CONFIG_NFS_V4_1 1451#ifdef CONFIG_NFS_V4_1
1452void nfs4_schedule_session_recovery(struct nfs4_session *session)
1453{
1454 nfs4_schedule_lease_recovery(session->clp);
1455}
1456EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
1457
1439void nfs41_handle_recall_slot(struct nfs_client *clp) 1458void nfs41_handle_recall_slot(struct nfs_client *clp)
1440{ 1459{
1441 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1460 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1442 nfs4_schedule_state_recovery(clp); 1461 nfs4_schedule_state_manager(clp);
1443} 1462}
1444 1463
1445static void nfs4_reset_all_state(struct nfs_client *clp) 1464static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1466,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
1447 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1466 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1448 clp->cl_boot_time = CURRENT_TIME; 1467 clp->cl_boot_time = CURRENT_TIME;
1449 nfs4_state_start_reclaim_nograce(clp); 1468 nfs4_state_start_reclaim_nograce(clp);
1450 nfs4_schedule_state_recovery(clp); 1469 nfs4_schedule_state_manager(clp);
1451 } 1470 }
1452} 1471}
1453 1472
@@ -1455,7 +1474,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1455{ 1474{
1456 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1475 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1457 nfs4_state_start_reclaim_reboot(clp); 1476 nfs4_state_start_reclaim_reboot(clp);
1458 nfs4_schedule_state_recovery(clp); 1477 nfs4_schedule_state_manager(clp);
1459 } 1478 }
1460} 1479}
1461 1480
@@ -1475,7 +1494,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1475{ 1494{
1476 nfs_expire_all_delegations(clp); 1495 nfs_expire_all_delegations(clp);
1477 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1496 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1478 nfs4_schedule_state_recovery(clp); 1497 nfs4_schedule_state_manager(clp);
1479} 1498}
1480 1499
1481void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1500void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..0cf560f77884 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -844,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
844 if (iap->ia_valid & ATTR_MODE) 844 if (iap->ia_valid & ATTR_MODE)
845 len += 4; 845 len += 4;
846 if (iap->ia_valid & ATTR_UID) { 846 if (iap->ia_valid & ATTR_UID) {
847 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ); 847 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
848 if (owner_namelen < 0) { 848 if (owner_namelen < 0) {
849 dprintk("nfs: couldn't resolve uid %d to string\n", 849 dprintk("nfs: couldn't resolve uid %d to string\n",
850 iap->ia_uid); 850 iap->ia_uid);
@@ -856,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
857 } 857 }
858 if (iap->ia_valid & ATTR_GID) { 858 if (iap->ia_valid & ATTR_GID) {
859 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ); 859 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
860 if (owner_grouplen < 0) { 860 if (owner_grouplen < 0) {
861 dprintk("nfs: couldn't resolve gid %d to string\n", 861 dprintk("nfs: couldn't resolve gid %d to string\n",
862 iap->ia_gid); 862 iap->ia_gid);
@@ -1384,7 +1384,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1384 hdr->replen += decode_putrootfh_maxsz; 1384 hdr->replen += decode_putrootfh_maxsz;
1385} 1385}
1386 1386
1387static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx) 1387static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid)
1388{ 1388{
1389 nfs4_stateid stateid; 1389 nfs4_stateid stateid;
1390 __be32 *p; 1390 __be32 *p;
@@ -1392,6 +1392,8 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1392 p = reserve_space(xdr, NFS4_STATEID_SIZE); 1392 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1393 if (ctx->state != NULL) { 1393 if (ctx->state != NULL) {
1394 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); 1394 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
1395 if (zero_seqid)
1396 stateid.stateid.seqid = 0;
1395 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); 1397 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1396 } else 1398 } else
1397 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); 1399 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
@@ -1404,7 +1406,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1404 p = reserve_space(xdr, 4); 1406 p = reserve_space(xdr, 4);
1405 *p = cpu_to_be32(OP_READ); 1407 *p = cpu_to_be32(OP_READ);
1406 1408
1407 encode_stateid(xdr, args->context, args->lock_context); 1409 encode_stateid(xdr, args->context, args->lock_context,
1410 hdr->minorversion);
1408 1411
1409 p = reserve_space(xdr, 12); 1412 p = reserve_space(xdr, 12);
1410 p = xdr_encode_hyper(p, args->offset); 1413 p = xdr_encode_hyper(p, args->offset);
@@ -1592,7 +1595,8 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1592 p = reserve_space(xdr, 4); 1595 p = reserve_space(xdr, 4);
1593 *p = cpu_to_be32(OP_WRITE); 1596 *p = cpu_to_be32(OP_WRITE);
1594 1597
1595 encode_stateid(xdr, args->context, args->lock_context); 1598 encode_stateid(xdr, args->context, args->lock_context,
1599 hdr->minorversion);
1596 1600
1597 p = reserve_space(xdr, 16); 1601 p = reserve_space(xdr, 16);
1598 p = xdr_encode_hyper(p, args->offset); 1602 p = xdr_encode_hyper(p, args->offset);
@@ -1660,7 +1664,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1660 1664
1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1665 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1662 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1666 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1663 p = xdr_encode_hyper(p, clp->cl_ex_clid); 1667 p = xdr_encode_hyper(p, clp->cl_clientid);
1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1668 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1665 *p++ = cpu_to_be32(args->flags); /*flags */ 1669 *p++ = cpu_to_be32(args->flags); /*flags */
1666 1670
@@ -2271,7 +2275,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2271 encode_putfh(xdr, args->fh, &hdr); 2275 encode_putfh(xdr, args->fh, &hdr);
2272 encode_write(xdr, args, &hdr); 2276 encode_write(xdr, args, &hdr);
2273 req->rq_snd_buf.flags |= XDRBUF_WRITE; 2277 req->rq_snd_buf.flags |= XDRBUF_WRITE;
2274 encode_getfattr(xdr, args->bitmask, &hdr); 2278 if (args->bitmask)
2279 encode_getfattr(xdr, args->bitmask, &hdr);
2275 encode_nops(&hdr); 2280 encode_nops(&hdr);
2276} 2281}
2277 2282
@@ -3382,7 +3387,7 @@ out_overflow:
3382} 3387}
3383 3388
3384static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, 3389static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3385 struct nfs_client *clp, uint32_t *uid, int may_sleep) 3390 const struct nfs_server *server, uint32_t *uid, int may_sleep)
3386{ 3391{
3387 uint32_t len; 3392 uint32_t len;
3388 __be32 *p; 3393 __be32 *p;
@@ -3402,7 +3407,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3402 if (!may_sleep) { 3407 if (!may_sleep) {
3403 /* do nothing */ 3408 /* do nothing */
3404 } else if (len < XDR_MAX_NETOBJ) { 3409 } else if (len < XDR_MAX_NETOBJ) {
3405 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) 3410 if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
3406 ret = NFS_ATTR_FATTR_OWNER; 3411 ret = NFS_ATTR_FATTR_OWNER;
3407 else 3412 else
3408 dprintk("%s: nfs_map_name_to_uid failed!\n", 3413 dprintk("%s: nfs_map_name_to_uid failed!\n",
@@ -3420,7 +3425,7 @@ out_overflow:
3420} 3425}
3421 3426
3422static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, 3427static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3423 struct nfs_client *clp, uint32_t *gid, int may_sleep) 3428 const struct nfs_server *server, uint32_t *gid, int may_sleep)
3424{ 3429{
3425 uint32_t len; 3430 uint32_t len;
3426 __be32 *p; 3431 __be32 *p;
@@ -3440,7 +3445,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3440 if (!may_sleep) { 3445 if (!may_sleep) {
3441 /* do nothing */ 3446 /* do nothing */
3442 } else if (len < XDR_MAX_NETOBJ) { 3447 } else if (len < XDR_MAX_NETOBJ) {
3443 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) 3448 if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
3444 ret = NFS_ATTR_FATTR_GROUP; 3449 ret = NFS_ATTR_FATTR_GROUP;
3445 else 3450 else
3446 dprintk("%s: nfs_map_group_to_gid failed!\n", 3451 dprintk("%s: nfs_map_group_to_gid failed!\n",
@@ -3939,14 +3944,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3939 goto xdr_error; 3944 goto xdr_error;
3940 fattr->valid |= status; 3945 fattr->valid |= status;
3941 3946
3942 status = decode_attr_owner(xdr, bitmap, server->nfs_client, 3947 status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
3943 &fattr->uid, may_sleep);
3944 if (status < 0) 3948 if (status < 0)
3945 goto xdr_error; 3949 goto xdr_error;
3946 fattr->valid |= status; 3950 fattr->valid |= status;
3947 3951
3948 status = decode_attr_group(xdr, bitmap, server->nfs_client, 3952 status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
3949 &fattr->gid, may_sleep);
3950 if (status < 0) 3953 if (status < 0)
3951 goto xdr_error; 3954 goto xdr_error;
3952 fattr->valid |= status; 3955 fattr->valid |= status;
@@ -4694,7 +4697,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4694 p = xdr_inline_decode(xdr, 8); 4697 p = xdr_inline_decode(xdr, 8);
4695 if (unlikely(!p)) 4698 if (unlikely(!p))
4696 goto out_overflow; 4699 goto out_overflow;
4697 xdr_decode_hyper(p, &clp->cl_ex_clid); 4700 xdr_decode_hyper(p, &clp->cl_clientid);
4698 p = xdr_inline_decode(xdr, 12); 4701 p = xdr_inline_decode(xdr, 12);
4699 if (unlikely(!p)) 4702 if (unlikely(!p))
4700 goto out_overflow; 4703 goto out_overflow;
@@ -5690,8 +5693,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5690 status = decode_write(xdr, res); 5693 status = decode_write(xdr, res);
5691 if (status) 5694 if (status)
5692 goto out; 5695 goto out;
5693 decode_getfattr(xdr, res->fattr, res->server, 5696 if (res->fattr)
5694 !RPC_IS_ASYNC(rqstp->rq_task)); 5697 decode_getfattr(xdr, res->fattr, res->server,
5698 !RPC_IS_ASYNC(rqstp->rq_task));
5695 if (!status) 5699 if (!status)
5696 status = res->count; 5700 status = res->count;
5697out: 5701out:
@@ -6167,8 +6171,6 @@ static struct {
6167 { NFS4ERR_DQUOT, -EDQUOT }, 6171 { NFS4ERR_DQUOT, -EDQUOT },
6168 { NFS4ERR_STALE, -ESTALE }, 6172 { NFS4ERR_STALE, -ESTALE },
6169 { NFS4ERR_BADHANDLE, -EBADHANDLE }, 6173 { NFS4ERR_BADHANDLE, -EBADHANDLE },
6170 { NFS4ERR_BADOWNER, -EINVAL },
6171 { NFS4ERR_BADNAME, -EINVAL },
6172 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, 6174 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
6173 { NFS4ERR_NOTSUPP, -ENOTSUPP }, 6175 { NFS4ERR_NOTSUPP, -ENOTSUPP },
6174 { NFS4ERR_TOOSMALL, -ETOOSMALL }, 6176 { NFS4ERR_TOOSMALL, -ETOOSMALL },
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
86/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
87#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
88 88
89/* Default NFSROOT mount options. */
90#define NFS_DEF_OPTIONS "udp"
91
89/* Parameters passed from the kernel command line */ 92/* Parameters passed from the kernel command line */
90static char nfs_root_parms[256] __initdata = ""; 93static char nfs_root_parms[256] __initdata = "";
91 94
92/* Text-based mount options passed to super.c */ 95/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = ""; 96static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
94 97
95/* Address of NFS server */ 98/* Address of NFS server */
96static __be32 servaddr __initdata = htonl(INADDR_NONE); 99static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
160} 163}
161 164
162static int __init root_nfs_cat(char *dest, const char *src, 165static int __init root_nfs_cat(char *dest, const char *src,
163 const size_t destlen) 166 const size_t destlen)
164{ 167{
168 size_t len = strlen(dest);
169
170 if (len && dest[len - 1] != ',')
171 if (strlcat(dest, ",", destlen) > destlen)
172 return -1;
173
165 if (strlcat(dest, src, destlen) > destlen) 174 if (strlcat(dest, src, destlen) > destlen)
166 return -1; 175 return -1;
167 return 0; 176 return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
194 if (root_nfs_cat(nfs_root_options, incoming, 203 if (root_nfs_cat(nfs_root_options, incoming,
195 sizeof(nfs_root_options))) 204 sizeof(nfs_root_options)))
196 return -1; 205 return -1;
197
198 /*
199 * Possibly prepare for more options to be appended
200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
206
207 return 0; 206 return 0;
208} 207}
209 208
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
217 */ 216 */
218static int __init root_nfs_data(char *cmdline) 217static int __init root_nfs_data(char *cmdline)
219{ 218{
220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; 219 char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
221 int len, retval = -1; 220 int len, retval = -1;
222 char *tmp = NULL; 221 char *tmp = NULL;
223 const size_t tmplen = sizeof(nfs_export_path); 222 const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
244 * Append mandatory options for nfsroot so they override 243 * Append mandatory options for nfsroot so they override
245 * what has come before 244 * what has come before
246 */ 245 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", 246 snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
248 &servaddr); 247 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option, 248 if (root_nfs_cat(nfs_root_options, mand_options,
250 sizeof(nfs_root_options))) 249 sizeof(nfs_root_options)))
251 goto out_optionstoolong; 250 goto out_optionstoolong;
252 251
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e1164e3f9e69..23e794410669 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -20,6 +20,7 @@
20#include <linux/nfs_mount.h> 20#include <linux/nfs_mount.h>
21 21
22#include "internal.h" 22#include "internal.h"
23#include "pnfs.h"
23 24
24static struct kmem_cache *nfs_page_cachep; 25static struct kmem_cache *nfs_page_cachep;
25 26
@@ -213,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req)
213 */ 214 */
214void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 215void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
215 struct inode *inode, 216 struct inode *inode,
216 int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), 217 int (*doio)(struct nfs_pageio_descriptor *),
217 size_t bsize, 218 size_t bsize,
218 int io_flags) 219 int io_flags)
219{ 220{
@@ -226,6 +227,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
226 desc->pg_doio = doio; 227 desc->pg_doio = doio;
227 desc->pg_ioflags = io_flags; 228 desc->pg_ioflags = io_flags;
228 desc->pg_error = 0; 229 desc->pg_error = 0;
230 desc->pg_lseg = NULL;
229} 231}
230 232
231/** 233/**
@@ -240,7 +242,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
240 * Return 'true' if this is the case, else return 'false'. 242 * Return 'true' if this is the case, else return 'false'.
241 */ 243 */
242static int nfs_can_coalesce_requests(struct nfs_page *prev, 244static int nfs_can_coalesce_requests(struct nfs_page *prev,
243 struct nfs_page *req) 245 struct nfs_page *req,
246 struct nfs_pageio_descriptor *pgio)
244{ 247{
245 if (req->wb_context->cred != prev->wb_context->cred) 248 if (req->wb_context->cred != prev->wb_context->cred)
246 return 0; 249 return 0;
@@ -254,6 +257,12 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
254 return 0; 257 return 0;
255 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 258 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
256 return 0; 259 return 0;
260 /*
261 * Non-whole file layouts need to check that req is inside of
262 * pgio->pg_lseg.
263 */
264 if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
265 return 0;
257 return 1; 266 return 1;
258} 267}
259 268
@@ -286,7 +295,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
286 if (newlen > desc->pg_bsize) 295 if (newlen > desc->pg_bsize)
287 return 0; 296 return 0;
288 prev = nfs_list_entry(desc->pg_list.prev); 297 prev = nfs_list_entry(desc->pg_list.prev);
289 if (!nfs_can_coalesce_requests(prev, req)) 298 if (!nfs_can_coalesce_requests(prev, req, desc))
290 return 0; 299 return 0;
291 } else 300 } else
292 desc->pg_base = req->wb_pgbase; 301 desc->pg_base = req->wb_pgbase;
@@ -302,12 +311,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
302static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 311static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
303{ 312{
304 if (!list_empty(&desc->pg_list)) { 313 if (!list_empty(&desc->pg_list)) {
305 int error = desc->pg_doio(desc->pg_inode, 314 int error = desc->pg_doio(desc);
306 &desc->pg_list,
307 nfs_page_array_len(desc->pg_base,
308 desc->pg_count),
309 desc->pg_count,
310 desc->pg_ioflags);
311 if (error < 0) 315 if (error < 0)
312 desc->pg_error = error; 316 desc->pg_error = error;
313 else 317 else
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1b1bc1a0fb0a..f38813a0a295 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -30,6 +30,7 @@
30#include <linux/nfs_fs.h> 30#include <linux/nfs_fs.h>
31#include "internal.h" 31#include "internal.h"
32#include "pnfs.h" 32#include "pnfs.h"
33#include "iostat.h"
33 34
34#define NFSDBG_FACILITY NFSDBG_PNFS 35#define NFSDBG_FACILITY NFSDBG_PNFS
35 36
@@ -74,10 +75,8 @@ find_pnfs_driver(u32 id)
74void 75void
75unset_pnfs_layoutdriver(struct nfs_server *nfss) 76unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{ 77{
77 if (nfss->pnfs_curr_ld) { 78 if (nfss->pnfs_curr_ld)
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner); 79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL; 80 nfss->pnfs_curr_ld = NULL;
82} 81}
83 82
@@ -115,13 +114,7 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
115 goto out_no_driver; 114 goto out_no_driver;
116 } 115 }
117 server->pnfs_curr_ld = ld_type; 116 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) { 117
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id); 118 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return; 119 return;
127 120
@@ -230,37 +223,41 @@ static void free_lseg(struct pnfs_layout_segment *lseg)
230 put_layout_hdr(NFS_I(ino)->layout); 223 put_layout_hdr(NFS_I(ino)->layout);
231} 224}
232 225
233/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg 226static void
234 * could sleep, so must be called outside of the lock. 227put_lseg_common(struct pnfs_layout_segment *lseg)
235 * Returns 1 if object was removed, otherwise return 0. 228{
236 */ 229 struct inode *inode = lseg->pls_layout->plh_inode;
237static int 230
238put_lseg_locked(struct pnfs_layout_segment *lseg, 231 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
239 struct list_head *tmp_list) 232 list_del_init(&lseg->pls_list);
233 if (list_empty(&lseg->pls_layout->plh_segs)) {
234 set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
235 /* Matched by initial refcount set in alloc_init_layout_hdr */
236 put_layout_hdr_locked(lseg->pls_layout);
237 }
238 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
239}
240
241void
242put_lseg(struct pnfs_layout_segment *lseg)
240{ 243{
244 struct inode *inode;
245
246 if (!lseg)
247 return;
248
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 249 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
242 atomic_read(&lseg->pls_refcount), 250 atomic_read(&lseg->pls_refcount),
243 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 251 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
244 if (atomic_dec_and_test(&lseg->pls_refcount)) { 252 inode = lseg->pls_layout->plh_inode;
245 struct inode *ino = lseg->pls_layout->plh_inode; 253 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
254 LIST_HEAD(free_me);
246 255
247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 256 put_lseg_common(lseg);
248 list_del(&lseg->pls_list); 257 list_add(&lseg->pls_list, &free_me);
249 if (list_empty(&lseg->pls_layout->plh_segs)) { 258 spin_unlock(&inode->i_lock);
250 struct nfs_client *clp; 259 pnfs_free_lseg_list(&free_me);
251
252 clp = NFS_SERVER(ino)->nfs_client;
253 spin_lock(&clp->cl_lock);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
258 }
259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
260 list_add(&lseg->pls_list, tmp_list);
261 return 1;
262 } 260 }
263 return 0;
264} 261}
265 262
266static bool 263static bool
@@ -281,7 +278,13 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
281 * list. It will now be removed when all 278 * list. It will now be removed when all
282 * outstanding io is finished. 279 * outstanding io is finished.
283 */ 280 */
284 rv = put_lseg_locked(lseg, tmp_list); 281 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
282 atomic_read(&lseg->pls_refcount));
283 if (atomic_dec_and_test(&lseg->pls_refcount)) {
284 put_lseg_common(lseg);
285 list_add(&lseg->pls_list, tmp_list);
286 rv = 1;
287 }
285 } 288 }
286 return rv; 289 return rv;
287} 290}
@@ -299,6 +302,11 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
299 302
300 dprintk("%s:Begin lo %p\n", __func__, lo); 303 dprintk("%s:Begin lo %p\n", __func__, lo);
301 304
305 if (list_empty(&lo->plh_segs)) {
306 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
307 put_layout_hdr_locked(lo);
308 return 0;
309 }
302 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 310 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
303 if (should_free_lseg(lseg->pls_range.iomode, iomode)) { 311 if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
304 dprintk("%s: freeing lseg %p iomode %d " 312 dprintk("%s: freeing lseg %p iomode %d "
@@ -312,11 +320,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
312 return invalid - removed; 320 return invalid - removed;
313} 321}
314 322
323/* note free_me must contain lsegs from a single layout_hdr */
315void 324void
316pnfs_free_lseg_list(struct list_head *free_me) 325pnfs_free_lseg_list(struct list_head *free_me)
317{ 326{
318 struct pnfs_layout_segment *lseg, *tmp; 327 struct pnfs_layout_segment *lseg, *tmp;
328 struct pnfs_layout_hdr *lo;
329
330 if (list_empty(free_me))
331 return;
319 332
333 lo = list_first_entry(free_me, struct pnfs_layout_segment,
334 pls_list)->pls_layout;
335
336 if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
337 struct nfs_client *clp;
338
339 clp = NFS_SERVER(lo->plh_inode)->nfs_client;
340 spin_lock(&clp->cl_lock);
341 list_del_init(&lo->plh_layouts);
342 spin_unlock(&clp->cl_lock);
343 }
320 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 344 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
321 list_del(&lseg->pls_list); 345 list_del(&lseg->pls_list);
322 free_lseg(lseg); 346 free_lseg(lseg);
@@ -332,10 +356,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
332 spin_lock(&nfsi->vfs_inode.i_lock); 356 spin_lock(&nfsi->vfs_inode.i_lock);
333 lo = nfsi->layout; 357 lo = nfsi->layout;
334 if (lo) { 358 if (lo) {
335 set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); 359 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
336 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); 360 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
338 put_layout_hdr_locked(lo);
339 } 361 }
340 spin_unlock(&nfsi->vfs_inode.i_lock); 362 spin_unlock(&nfsi->vfs_inode.i_lock);
341 pnfs_free_lseg_list(&tmp_list); 363 pnfs_free_lseg_list(&tmp_list);
@@ -403,6 +425,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) 425 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
404 return true; 426 return true;
405 return lo->plh_block_lgets || 427 return lo->plh_block_lgets ||
428 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 429 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
407 (list_empty(&lo->plh_segs) && 430 (list_empty(&lo->plh_segs) &&
408 (atomic_read(&lo->plh_outstanding) > lget)); 431 (atomic_read(&lo->plh_outstanding) > lget));
@@ -674,7 +697,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
674 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 697 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
675 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 698 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
676 is_matching_lseg(lseg, iomode)) { 699 is_matching_lseg(lseg, iomode)) {
677 ret = lseg; 700 ret = get_lseg(lseg);
678 break; 701 break;
679 } 702 }
680 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) 703 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
@@ -699,6 +722,7 @@ pnfs_update_layout(struct inode *ino,
699 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 722 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
700 struct pnfs_layout_hdr *lo; 723 struct pnfs_layout_hdr *lo;
701 struct pnfs_layout_segment *lseg = NULL; 724 struct pnfs_layout_segment *lseg = NULL;
725 bool first = false;
702 726
703 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 727 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
704 return NULL; 728 return NULL;
@@ -715,21 +739,25 @@ pnfs_update_layout(struct inode *ino,
715 dprintk("%s matches recall, use MDS\n", __func__); 739 dprintk("%s matches recall, use MDS\n", __func__);
716 goto out_unlock; 740 goto out_unlock;
717 } 741 }
718 /* Check to see if the layout for the given range already exists */
719 lseg = pnfs_find_lseg(lo, iomode);
720 if (lseg)
721 goto out_unlock;
722 742
723 /* if LAYOUTGET already failed once we don't try again */ 743 /* if LAYOUTGET already failed once we don't try again */
724 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) 744 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
725 goto out_unlock; 745 goto out_unlock;
726 746
747 /* Check to see if the layout for the given range already exists */
748 lseg = pnfs_find_lseg(lo, iomode);
749 if (lseg)
750 goto out_unlock;
751
727 if (pnfs_layoutgets_blocked(lo, NULL, 0)) 752 if (pnfs_layoutgets_blocked(lo, NULL, 0))
728 goto out_unlock; 753 goto out_unlock;
729 atomic_inc(&lo->plh_outstanding); 754 atomic_inc(&lo->plh_outstanding);
730 755
731 get_layout_hdr(lo); 756 get_layout_hdr(lo);
732 if (list_empty(&lo->plh_segs)) { 757 if (list_empty(&lo->plh_segs))
758 first = true;
759 spin_unlock(&ino->i_lock);
760 if (first) {
733 /* The lo must be on the clp list if there is any 761 /* The lo must be on the clp list if there is any
734 * chance of a CB_LAYOUTRECALL(FILE) coming in. 762 * chance of a CB_LAYOUTRECALL(FILE) coming in.
735 */ 763 */
@@ -738,24 +766,18 @@ pnfs_update_layout(struct inode *ino,
738 list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 766 list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
739 spin_unlock(&clp->cl_lock); 767 spin_unlock(&clp->cl_lock);
740 } 768 }
741 spin_unlock(&ino->i_lock);
742 769
743 lseg = send_layoutget(lo, ctx, iomode); 770 lseg = send_layoutget(lo, ctx, iomode);
744 if (!lseg) { 771 if (!lseg && first) {
745 spin_lock(&ino->i_lock); 772 spin_lock(&clp->cl_lock);
746 if (list_empty(&lo->plh_segs)) { 773 list_del_init(&lo->plh_layouts);
747 spin_lock(&clp->cl_lock); 774 spin_unlock(&clp->cl_lock);
748 list_del_init(&lo->plh_layouts);
749 spin_unlock(&clp->cl_lock);
750 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
751 }
752 spin_unlock(&ino->i_lock);
753 } 775 }
754 atomic_dec(&lo->plh_outstanding); 776 atomic_dec(&lo->plh_outstanding);
755 put_layout_hdr(lo); 777 put_layout_hdr(lo);
756out: 778out:
757 dprintk("%s end, state 0x%lx lseg %p\n", __func__, 779 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
758 nfsi->layout->plh_flags, lseg); 780 nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
759 return lseg; 781 return lseg;
760out_unlock: 782out_unlock:
761 spin_unlock(&ino->i_lock); 783 spin_unlock(&ino->i_lock);
@@ -808,7 +830,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
808 } 830 }
809 init_lseg(lo, lseg); 831 init_lseg(lo, lseg);
810 lseg->pls_range = res->range; 832 lseg->pls_range = res->range;
811 *lgp->lsegpp = lseg; 833 *lgp->lsegpp = get_lseg(lseg);
812 pnfs_insert_layout(lo, lseg); 834 pnfs_insert_layout(lo, lseg);
813 835
814 if (res->return_on_close) { 836 if (res->return_on_close) {
@@ -829,137 +851,97 @@ out_forget_reply:
829 goto out; 851 goto out;
830} 852}
831 853
832/* 854static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
833 * Device ID cache. Currently supports one layout type per struct nfs_client. 855 struct nfs_page *prev,
834 * Add layout type to the lookup key to expand to support multiple types. 856 struct nfs_page *req)
835 */
836int
837pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
838 void (*free_callback)(struct pnfs_deviceid_node *))
839{ 857{
840 struct pnfs_deviceid_cache *c; 858 if (pgio->pg_count == prev->wb_bytes) {
841 859 /* This is first coelesce call for a series of nfs_pages */
842 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); 860 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
843 if (!c) 861 prev->wb_context,
844 return -ENOMEM; 862 IOMODE_READ);
845 spin_lock(&clp->cl_lock);
846 if (clp->cl_devid_cache != NULL) {
847 atomic_inc(&clp->cl_devid_cache->dc_ref);
848 dprintk("%s [kref [%d]]\n", __func__,
849 atomic_read(&clp->cl_devid_cache->dc_ref));
850 kfree(c);
851 } else {
852 /* kzalloc initializes hlists */
853 spin_lock_init(&c->dc_lock);
854 atomic_set(&c->dc_ref, 1);
855 c->dc_free_callback = free_callback;
856 clp->cl_devid_cache = c;
857 dprintk("%s [new]\n", __func__);
858 } 863 }
859 spin_unlock(&clp->cl_lock); 864 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
860 return 0;
861} 865}
862EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
863 866
864/*
865 * Called from pnfs_layoutdriver_type->free_lseg
866 * last layout segment reference frees deviceid
867 */
868void 867void
869pnfs_put_deviceid(struct pnfs_deviceid_cache *c, 868pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
870 struct pnfs_deviceid_node *devid)
871{ 869{
872 struct nfs4_deviceid *id = &devid->de_id; 870 struct pnfs_layoutdriver_type *ld;
873 struct pnfs_deviceid_node *d;
874 struct hlist_node *n;
875 long h = nfs4_deviceid_hash(id);
876 871
877 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); 872 ld = NFS_SERVER(inode)->pnfs_curr_ld;
878 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) 873 pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
879 return; 874}
880 875
881 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) 876static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
882 if (!memcmp(&d->de_id, id, sizeof(*id))) { 877 struct nfs_page *prev,
883 hlist_del_rcu(&d->de_node); 878 struct nfs_page *req)
884 spin_unlock(&c->dc_lock); 879{
885 synchronize_rcu(); 880 if (pgio->pg_count == prev->wb_bytes) {
886 c->dc_free_callback(devid); 881 /* This is first coelesce call for a series of nfs_pages */
887 return; 882 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
888 } 883 prev->wb_context,
889 spin_unlock(&c->dc_lock); 884 IOMODE_RW);
890 /* Why wasn't it found in the list? */
891 BUG();
892}
893EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
894
895/* Find and reference a deviceid */
896struct pnfs_deviceid_node *
897pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
898{
899 struct pnfs_deviceid_node *d;
900 struct hlist_node *n;
901 long hash = nfs4_deviceid_hash(id);
902
903 dprintk("--> %s hash %ld\n", __func__, hash);
904 rcu_read_lock();
905 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
906 if (!memcmp(&d->de_id, id, sizeof(*id))) {
907 if (!atomic_inc_not_zero(&d->de_ref)) {
908 goto fail;
909 } else {
910 rcu_read_unlock();
911 return d;
912 }
913 }
914 } 885 }
915fail: 886 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
916 rcu_read_unlock(); 887}
917 return NULL; 888
889void
890pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
891{
892 struct pnfs_layoutdriver_type *ld;
893
894 ld = NFS_SERVER(inode)->pnfs_curr_ld;
895 pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
896}
897
898enum pnfs_try_status
899pnfs_try_to_write_data(struct nfs_write_data *wdata,
900 const struct rpc_call_ops *call_ops, int how)
901{
902 struct inode *inode = wdata->inode;
903 enum pnfs_try_status trypnfs;
904 struct nfs_server *nfss = NFS_SERVER(inode);
905
906 wdata->mds_ops = call_ops;
907
908 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
909 inode->i_ino, wdata->args.count, wdata->args.offset, how);
910
911 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
912 if (trypnfs == PNFS_NOT_ATTEMPTED) {
913 put_lseg(wdata->lseg);
914 wdata->lseg = NULL;
915 } else
916 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
917
918 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
919 return trypnfs;
918} 920}
919EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
920 921
921/* 922/*
922 * Add a deviceid to the cache. 923 * Call the appropriate parallel I/O subsystem read function.
923 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
924 */ 924 */
925struct pnfs_deviceid_node * 925enum pnfs_try_status
926pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) 926pnfs_try_to_read_data(struct nfs_read_data *rdata,
927{ 927 const struct rpc_call_ops *call_ops)
928 struct pnfs_deviceid_node *d;
929 long hash = nfs4_deviceid_hash(&new->de_id);
930
931 dprintk("--> %s hash %ld\n", __func__, hash);
932 spin_lock(&c->dc_lock);
933 d = pnfs_find_get_deviceid(c, &new->de_id);
934 if (d) {
935 spin_unlock(&c->dc_lock);
936 dprintk("%s [discard]\n", __func__);
937 c->dc_free_callback(new);
938 return d;
939 }
940 INIT_HLIST_NODE(&new->de_node);
941 atomic_set(&new->de_ref, 1);
942 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
943 spin_unlock(&c->dc_lock);
944 dprintk("%s [new]\n", __func__);
945 return new;
946}
947EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
948
949void
950pnfs_put_deviceid_cache(struct nfs_client *clp)
951{ 928{
952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache; 929 struct inode *inode = rdata->inode;
930 struct nfs_server *nfss = NFS_SERVER(inode);
931 enum pnfs_try_status trypnfs;
953 932
954 dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); 933 rdata->mds_ops = call_ops;
955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { 934
956 int i; 935 dprintk("%s: Reading ino:%lu %u@%llu\n",
957 /* Verify cache is empty */ 936 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
958 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) 937
959 BUG_ON(!hlist_empty(&local->dc_deviceids[i])); 938 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
960 clp->cl_devid_cache = NULL; 939 if (trypnfs == PNFS_NOT_ATTEMPTED) {
961 spin_unlock(&clp->cl_lock); 940 put_lseg(rdata->lseg);
962 kfree(local); 941 rdata->lseg = NULL;
942 } else {
943 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
963 } 944 }
945 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
946 return trypnfs;
964} 947}
965EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e2612ea0cbed..6380b9405bcd 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,6 +30,8 @@
30#ifndef FS_NFS_PNFS_H 30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H 31#define FS_NFS_PNFS_H
32 32
33#include <linux/nfs_page.h>
34
33enum { 35enum {
34 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 36 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
35 NFS_LSEG_ROC, /* roc bit received from server */ 37 NFS_LSEG_ROC, /* roc bit received from server */
@@ -43,6 +45,11 @@ struct pnfs_layout_segment {
43 struct pnfs_layout_hdr *pls_layout; 45 struct pnfs_layout_hdr *pls_layout;
44}; 46};
45 47
48enum pnfs_try_status {
49 PNFS_ATTEMPTED = 0,
50 PNFS_NOT_ATTEMPTED = 1,
51};
52
46#ifdef CONFIG_NFS_V4_1 53#ifdef CONFIG_NFS_V4_1
47 54
48#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" 55#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -61,10 +68,18 @@ struct pnfs_layoutdriver_type {
61 const u32 id; 68 const u32 id;
62 const char *name; 69 const char *name;
63 struct module *owner; 70 struct module *owner;
64 int (*set_layoutdriver) (struct nfs_server *);
65 int (*clear_layoutdriver) (struct nfs_server *);
66 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); 71 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
67 void (*free_lseg) (struct pnfs_layout_segment *lseg); 72 void (*free_lseg) (struct pnfs_layout_segment *lseg);
73
74 /* test for nfs page cache coalescing */
75 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
76
77 /*
78 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
79 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
80 */
81 enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
82 enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
68}; 83};
69 84
70struct pnfs_layout_hdr { 85struct pnfs_layout_hdr {
@@ -90,52 +105,6 @@ struct pnfs_device {
90 unsigned int pglen; 105 unsigned int pglen;
91}; 106};
92 107
93/*
94 * Device ID RCU cache. A device ID is unique per client ID and layout type.
95 */
96#define NFS4_DEVICE_ID_HASH_BITS 5
97#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
98#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
99
100static inline u32
101nfs4_deviceid_hash(struct nfs4_deviceid *id)
102{
103 unsigned char *cptr = (unsigned char *)id->data;
104 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
105 u32 x = 0;
106
107 while (nbytes--) {
108 x *= 37;
109 x += *cptr++;
110 }
111 return x & NFS4_DEVICE_ID_HASH_MASK;
112}
113
114struct pnfs_deviceid_node {
115 struct hlist_node de_node;
116 struct nfs4_deviceid de_id;
117 atomic_t de_ref;
118};
119
120struct pnfs_deviceid_cache {
121 spinlock_t dc_lock;
122 atomic_t dc_ref;
123 void (*dc_free_callback)(struct pnfs_deviceid_node *);
124 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
125};
126
127extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
128 void (*free_callback)(struct pnfs_deviceid_node *));
129extern void pnfs_put_deviceid_cache(struct nfs_client *);
130extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
131 struct pnfs_deviceid_cache *,
132 struct nfs4_deviceid *);
133extern struct pnfs_deviceid_node *pnfs_add_deviceid(
134 struct pnfs_deviceid_cache *,
135 struct pnfs_deviceid_node *);
136extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
137 struct pnfs_deviceid_node *devid);
138
139extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); 108extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
140extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); 109extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
141 110
@@ -146,11 +115,18 @@ extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
146 115
147/* pnfs.c */ 116/* pnfs.c */
148void get_layout_hdr(struct pnfs_layout_hdr *lo); 117void get_layout_hdr(struct pnfs_layout_hdr *lo);
118void put_lseg(struct pnfs_layout_segment *lseg);
149struct pnfs_layout_segment * 119struct pnfs_layout_segment *
150pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 120pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
151 enum pnfs_iomode access_type); 121 enum pnfs_iomode access_type);
152void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 122void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
153void unset_pnfs_layoutdriver(struct nfs_server *); 123void unset_pnfs_layoutdriver(struct nfs_server *);
124enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
125 const struct rpc_call_ops *, int);
126enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
127 const struct rpc_call_ops *);
128void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
129void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
154int pnfs_layout_process(struct nfs4_layoutget *lgp); 130int pnfs_layout_process(struct nfs4_layoutget *lgp);
155void pnfs_free_lseg_list(struct list_head *tmp_list); 131void pnfs_free_lseg_list(struct list_head *tmp_list);
156void pnfs_destroy_layout(struct nfs_inode *); 132void pnfs_destroy_layout(struct nfs_inode *);
@@ -177,6 +153,16 @@ static inline int lo_fail_bit(u32 iomode)
177 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
178} 154}
179 155
156static inline struct pnfs_layout_segment *
157get_lseg(struct pnfs_layout_segment *lseg)
158{
159 if (lseg) {
160 atomic_inc(&lseg->pls_refcount);
161 smp_mb__after_atomic_inc();
162 }
163 return lseg;
164}
165
180/* Return true if a layout driver is being used for this mountpoint */ 166/* Return true if a layout driver is being used for this mountpoint */
181static inline int pnfs_enabled_sb(struct nfs_server *nfss) 167static inline int pnfs_enabled_sb(struct nfs_server *nfss)
182{ 168{
@@ -194,12 +180,36 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
194} 180}
195 181
196static inline struct pnfs_layout_segment * 182static inline struct pnfs_layout_segment *
183get_lseg(struct pnfs_layout_segment *lseg)
184{
185 return NULL;
186}
187
188static inline void put_lseg(struct pnfs_layout_segment *lseg)
189{
190}
191
192static inline struct pnfs_layout_segment *
197pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 193pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
198 enum pnfs_iomode access_type) 194 enum pnfs_iomode access_type)
199{ 195{
200 return NULL; 196 return NULL;
201} 197}
202 198
199static inline enum pnfs_try_status
200pnfs_try_to_read_data(struct nfs_read_data *data,
201 const struct rpc_call_ops *call_ops)
202{
203 return PNFS_NOT_ATTEMPTED;
204}
205
206static inline enum pnfs_try_status
207pnfs_try_to_write_data(struct nfs_write_data *data,
208 const struct rpc_call_ops *call_ops, int how)
209{
210 return PNFS_NOT_ATTEMPTED;
211}
212
203static inline bool 213static inline bool
204pnfs_roc(struct inode *ino) 214pnfs_roc(struct inode *ino)
205{ 215{
@@ -230,6 +240,18 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
230{ 240{
231} 241}
232 242
243static inline void
244pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
245{
246 pgio->pg_test = NULL;
247}
248
249static inline void
250pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
251{
252 pgio->pg_test = NULL;
253}
254
233#endif /* CONFIG_NFS_V4_1 */ 255#endif /* CONFIG_NFS_V4_1 */
234 256
235#endif /* FS_NFS_PNFS_H */ 257#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 77d5e21c4ad6..b8ec170f2a0f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -741,4 +741,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
741 .lock = nfs_proc_lock, 741 .lock = nfs_proc_lock,
742 .lock_check_bounds = nfs_lock_check_bounds, 742 .lock_check_bounds = nfs_lock_check_bounds,
743 .close_context = nfs_close_context, 743 .close_context = nfs_close_context,
744 .init_client = nfs_init_client,
744}; 745};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index aedcaa7f291f..7cded2b12a05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,19 +18,20 @@
18#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
19#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
20#include <linux/nfs_page.h> 20#include <linux/nfs_page.h>
21#include <linux/module.h>
21 22
22#include <asm/system.h> 23#include <asm/system.h>
24#include "pnfs.h"
23 25
24#include "nfs4_fs.h" 26#include "nfs4_fs.h"
25#include "internal.h" 27#include "internal.h"
26#include "iostat.h" 28#include "iostat.h"
27#include "fscache.h" 29#include "fscache.h"
28#include "pnfs.h"
29 30
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 31#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 32
32static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 33static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
33static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 34static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
34static const struct rpc_call_ops nfs_read_partial_ops; 35static const struct rpc_call_ops nfs_read_partial_ops;
35static const struct rpc_call_ops nfs_read_full_ops; 36static const struct rpc_call_ops nfs_read_full_ops;
36 37
@@ -69,6 +70,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
69 70
70static void nfs_readdata_release(struct nfs_read_data *rdata) 71static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 72{
73 put_lseg(rdata->lseg);
72 put_nfs_open_context(rdata->args.context); 74 put_nfs_open_context(rdata->args.context);
73 nfs_readdata_free(rdata); 75 nfs_readdata_free(rdata);
74} 76}
@@ -114,14 +116,13 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
114int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 116int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
115 struct page *page) 117 struct page *page)
116{ 118{
117 LIST_HEAD(one_request);
118 struct nfs_page *new; 119 struct nfs_page *new;
119 unsigned int len; 120 unsigned int len;
121 struct nfs_pageio_descriptor pgio;
120 122
121 len = nfs_page_length(page); 123 len = nfs_page_length(page);
122 if (len == 0) 124 if (len == 0)
123 return nfs_return_empty_page(page); 125 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
125 new = nfs_create_request(ctx, inode, page, 0, len); 126 new = nfs_create_request(ctx, inode, page, 0, len);
126 if (IS_ERR(new)) { 127 if (IS_ERR(new)) {
127 unlock_page(page); 128 unlock_page(page);
@@ -130,11 +131,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
130 if (len < PAGE_CACHE_SIZE) 131 if (len < PAGE_CACHE_SIZE)
131 zero_user_segment(page, len, PAGE_CACHE_SIZE); 132 zero_user_segment(page, len, PAGE_CACHE_SIZE);
132 133
133 nfs_list_add_request(new, &one_request); 134 nfs_pageio_init(&pgio, inode, NULL, 0, 0);
135 nfs_list_add_request(new, &pgio.pg_list);
136 pgio.pg_count = len;
137
134 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 138 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
135 nfs_pagein_multi(inode, &one_request, 1, len, 0); 139 nfs_pagein_multi(&pgio);
136 else 140 else
137 nfs_pagein_one(inode, &one_request, 1, len, 0); 141 nfs_pagein_one(&pgio);
138 return 0; 142 return 0;
139} 143}
140 144
@@ -155,24 +159,20 @@ static void nfs_readpage_release(struct nfs_page *req)
155 nfs_release_request(req); 159 nfs_release_request(req);
156} 160}
157 161
158/* 162int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
159 * Set up the NFS read request struct 163 const struct rpc_call_ops *call_ops)
160 */
161static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
162 const struct rpc_call_ops *call_ops,
163 unsigned int count, unsigned int offset)
164{ 164{
165 struct inode *inode = req->wb_context->path.dentry->d_inode; 165 struct inode *inode = data->inode;
166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
167 struct rpc_task *task; 167 struct rpc_task *task;
168 struct rpc_message msg = { 168 struct rpc_message msg = {
169 .rpc_argp = &data->args, 169 .rpc_argp = &data->args,
170 .rpc_resp = &data->res, 170 .rpc_resp = &data->res,
171 .rpc_cred = req->wb_context->cred, 171 .rpc_cred = data->cred,
172 }; 172 };
173 struct rpc_task_setup task_setup_data = { 173 struct rpc_task_setup task_setup_data = {
174 .task = &data->task, 174 .task = &data->task,
175 .rpc_client = NFS_CLIENT(inode), 175 .rpc_client = clnt,
176 .rpc_message = &msg, 176 .rpc_message = &msg,
177 .callback_ops = call_ops, 177 .callback_ops = call_ops,
178 .callback_data = data, 178 .callback_data = data,
@@ -180,9 +180,39 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
180 .flags = RPC_TASK_ASYNC | swap_flags, 180 .flags = RPC_TASK_ASYNC | swap_flags,
181 }; 181 };
182 182
183 /* Set up the initial task struct. */
184 NFS_PROTO(inode)->read_setup(data, &msg);
185
186 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
187 "offset %llu)\n",
188 data->task.tk_pid,
189 inode->i_sb->s_id,
190 (long long)NFS_FILEID(inode),
191 data->args.count,
192 (unsigned long long)data->args.offset);
193
194 task = rpc_run_task(&task_setup_data);
195 if (IS_ERR(task))
196 return PTR_ERR(task);
197 rpc_put_task(task);
198 return 0;
199}
200EXPORT_SYMBOL_GPL(nfs_initiate_read);
201
202/*
203 * Set up the NFS read request struct
204 */
205static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
206 const struct rpc_call_ops *call_ops,
207 unsigned int count, unsigned int offset,
208 struct pnfs_layout_segment *lseg)
209{
210 struct inode *inode = req->wb_context->path.dentry->d_inode;
211
183 data->req = req; 212 data->req = req;
184 data->inode = inode; 213 data->inode = inode;
185 data->cred = msg.rpc_cred; 214 data->cred = req->wb_context->cred;
215 data->lseg = get_lseg(lseg);
186 216
187 data->args.fh = NFS_FH(inode); 217 data->args.fh = NFS_FH(inode);
188 data->args.offset = req_offset(req) + offset; 218 data->args.offset = req_offset(req) + offset;
@@ -197,21 +227,11 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
197 data->res.eof = 0; 227 data->res.eof = 0;
198 nfs_fattr_init(&data->fattr); 228 nfs_fattr_init(&data->fattr);
199 229
200 /* Set up the initial task struct. */ 230 if (data->lseg &&
201 NFS_PROTO(inode)->read_setup(data, &msg); 231 (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
202 232 return 0;
203 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
204 data->task.tk_pid,
205 inode->i_sb->s_id,
206 (long long)NFS_FILEID(inode),
207 count,
208 (unsigned long long)data->args.offset);
209 233
210 task = rpc_run_task(&task_setup_data); 234 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
211 if (IS_ERR(task))
212 return PTR_ERR(task);
213 rpc_put_task(task);
214 return 0;
215} 235}
216 236
217static void 237static void
@@ -240,20 +260,21 @@ nfs_async_read_error(struct list_head *head)
240 * won't see the new data until our attribute cache is updated. This is more 260 * won't see the new data until our attribute cache is updated. This is more
241 * or less conventional NFS client behavior. 261 * or less conventional NFS client behavior.
242 */ 262 */
243static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 263static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
244{ 264{
245 struct nfs_page *req = nfs_list_entry(head->next); 265 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
246 struct page *page = req->wb_page; 266 struct page *page = req->wb_page;
247 struct nfs_read_data *data; 267 struct nfs_read_data *data;
248 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 268 size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
249 unsigned int offset; 269 unsigned int offset;
250 int requests = 0; 270 int requests = 0;
251 int ret = 0; 271 int ret = 0;
272 struct pnfs_layout_segment *lseg;
252 LIST_HEAD(list); 273 LIST_HEAD(list);
253 274
254 nfs_list_remove_request(req); 275 nfs_list_remove_request(req);
255 276
256 nbytes = count; 277 nbytes = desc->pg_count;
257 do { 278 do {
258 size_t len = min(nbytes,rsize); 279 size_t len = min(nbytes,rsize);
259 280
@@ -266,9 +287,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
266 } while(nbytes != 0); 287 } while(nbytes != 0);
267 atomic_set(&req->wb_complete, requests); 288 atomic_set(&req->wb_complete, requests);
268 289
290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
269 ClearPageError(page); 292 ClearPageError(page);
270 offset = 0; 293 offset = 0;
271 nbytes = count; 294 nbytes = desc->pg_count;
272 do { 295 do {
273 int ret2; 296 int ret2;
274 297
@@ -280,12 +303,14 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
280 if (nbytes < rsize) 303 if (nbytes < rsize)
281 rsize = nbytes; 304 rsize = nbytes;
282 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 305 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
283 rsize, offset); 306 rsize, offset, lseg);
284 if (ret == 0) 307 if (ret == 0)
285 ret = ret2; 308 ret = ret2;
286 offset += rsize; 309 offset += rsize;
287 nbytes -= rsize; 310 nbytes -= rsize;
288 } while (nbytes != 0); 311 } while (nbytes != 0);
312 put_lseg(lseg);
313 desc->pg_lseg = NULL;
289 314
290 return ret; 315 return ret;
291 316
@@ -300,16 +325,21 @@ out_bad:
300 return -ENOMEM; 325 return -ENOMEM;
301} 326}
302 327
303static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 328static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
304{ 329{
305 struct nfs_page *req; 330 struct nfs_page *req;
306 struct page **pages; 331 struct page **pages;
307 struct nfs_read_data *data; 332 struct nfs_read_data *data;
333 struct list_head *head = &desc->pg_list;
334 struct pnfs_layout_segment *lseg = desc->pg_lseg;
308 int ret = -ENOMEM; 335 int ret = -ENOMEM;
309 336
310 data = nfs_readdata_alloc(npages); 337 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
311 if (!data) 338 desc->pg_count));
312 goto out_bad; 339 if (!data) {
340 nfs_async_read_error(head);
341 goto out;
342 }
313 343
314 pages = data->pagevec; 344 pages = data->pagevec;
315 while (!list_empty(head)) { 345 while (!list_empty(head)) {
@@ -320,10 +350,14 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
320 *pages++ = req->wb_page; 350 *pages++ = req->wb_page;
321 } 351 }
322 req = nfs_list_entry(data->pages.next); 352 req = nfs_list_entry(data->pages.next);
353 if ((!lseg) && list_is_singular(&data->pages))
354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
323 355
324 return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
325out_bad: 357 0, lseg);
326 nfs_async_read_error(head); 358out:
359 put_lseg(lseg);
360 desc->pg_lseg = NULL;
327 return ret; 361 return ret;
328} 362}
329 363
@@ -366,6 +400,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
366 return; 400 return;
367 401
368 /* Yes, so retry the read at the end of the data */ 402 /* Yes, so retry the read at the end of the data */
403 data->mds_offset += resp->count;
369 argp->offset += resp->count; 404 argp->offset += resp->count;
370 argp->pgbase += resp->count; 405 argp->pgbase += resp->count;
371 argp->count -= resp->count; 406 argp->count -= resp->count;
@@ -625,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
625 if (ret == 0) 660 if (ret == 0)
626 goto read_complete; /* all pages were read */ 661 goto read_complete; /* all pages were read */
627 662
628 pnfs_update_layout(inode, desc.ctx, IOMODE_READ); 663 pnfs_pageio_init_read(&pgio, inode);
629 if (rsize < PAGE_CACHE_SIZE) 664 if (rsize < PAGE_CACHE_SIZE)
630 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 665 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
631 else 666 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b68c8607770f..2b8e9a5e366a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -263,8 +263,11 @@ static match_table_t nfs_local_lock_tokens = {
263static void nfs_umount_begin(struct super_block *); 263static void nfs_umount_begin(struct super_block *);
264static int nfs_statfs(struct dentry *, struct kstatfs *); 264static int nfs_statfs(struct dentry *, struct kstatfs *);
265static int nfs_show_options(struct seq_file *, struct vfsmount *); 265static int nfs_show_options(struct seq_file *, struct vfsmount *);
266static int nfs_show_devname(struct seq_file *, struct vfsmount *);
267static int nfs_show_path(struct seq_file *, struct vfsmount *);
266static int nfs_show_stats(struct seq_file *, struct vfsmount *); 268static int nfs_show_stats(struct seq_file *, struct vfsmount *);
267static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 269static struct dentry *nfs_fs_mount(struct file_system_type *,
270 int, const char *, void *);
268static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 271static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
269 int flags, const char *dev_name, void *raw_data); 272 int flags, const char *dev_name, void *raw_data);
270static void nfs_put_super(struct super_block *); 273static void nfs_put_super(struct super_block *);
@@ -274,7 +277,7 @@ static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
274static struct file_system_type nfs_fs_type = { 277static struct file_system_type nfs_fs_type = {
275 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
276 .name = "nfs", 279 .name = "nfs",
277 .get_sb = nfs_get_sb, 280 .mount = nfs_fs_mount,
278 .kill_sb = nfs_kill_super, 281 .kill_sb = nfs_kill_super,
279 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
280}; 283};
@@ -296,6 +299,8 @@ static const struct super_operations nfs_sops = {
296 .evict_inode = nfs_evict_inode, 299 .evict_inode = nfs_evict_inode,
297 .umount_begin = nfs_umount_begin, 300 .umount_begin = nfs_umount_begin,
298 .show_options = nfs_show_options, 301 .show_options = nfs_show_options,
302 .show_devname = nfs_show_devname,
303 .show_path = nfs_show_path,
299 .show_stats = nfs_show_stats, 304 .show_stats = nfs_show_stats,
300 .remount_fs = nfs_remount, 305 .remount_fs = nfs_remount,
301}; 306};
@@ -303,16 +308,16 @@ static const struct super_operations nfs_sops = {
303#ifdef CONFIG_NFS_V4 308#ifdef CONFIG_NFS_V4
304static int nfs4_validate_text_mount_data(void *options, 309static int nfs4_validate_text_mount_data(void *options,
305 struct nfs_parsed_mount_data *args, const char *dev_name); 310 struct nfs_parsed_mount_data *args, const char *dev_name);
306static int nfs4_try_mount(int flags, const char *dev_name, 311static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
307 struct nfs_parsed_mount_data *data, struct vfsmount *mnt); 312 struct nfs_parsed_mount_data *data);
308static int nfs4_get_sb(struct file_system_type *fs_type, 313static struct dentry *nfs4_mount(struct file_system_type *fs_type,
309 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 314 int flags, const char *dev_name, void *raw_data);
310static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, 315static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
311 int flags, const char *dev_name, void *raw_data); 316 int flags, const char *dev_name, void *raw_data);
312static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, 317static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
313 int flags, const char *dev_name, void *raw_data); 318 int flags, const char *dev_name, void *raw_data);
314static int nfs4_referral_get_sb(struct file_system_type *fs_type, 319static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
315 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 320 int flags, const char *dev_name, void *raw_data);
316static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, 321static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
317 int flags, const char *dev_name, void *raw_data); 322 int flags, const char *dev_name, void *raw_data);
318static void nfs4_kill_super(struct super_block *sb); 323static void nfs4_kill_super(struct super_block *sb);
@@ -320,7 +325,7 @@ static void nfs4_kill_super(struct super_block *sb);
320static struct file_system_type nfs4_fs_type = { 325static struct file_system_type nfs4_fs_type = {
321 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
322 .name = "nfs4", 327 .name = "nfs4",
323 .get_sb = nfs4_get_sb, 328 .mount = nfs4_mount,
324 .kill_sb = nfs4_kill_super, 329 .kill_sb = nfs4_kill_super,
325 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 330 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
326}; 331};
@@ -352,7 +357,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
352struct file_system_type nfs4_referral_fs_type = { 357struct file_system_type nfs4_referral_fs_type = {
353 .owner = THIS_MODULE, 358 .owner = THIS_MODULE,
354 .name = "nfs4", 359 .name = "nfs4",
355 .get_sb = nfs4_referral_get_sb, 360 .mount = nfs4_referral_mount,
356 .kill_sb = nfs4_kill_super, 361 .kill_sb = nfs4_kill_super,
357 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 362 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
358}; 363};
@@ -366,6 +371,8 @@ static const struct super_operations nfs4_sops = {
366 .evict_inode = nfs4_evict_inode, 371 .evict_inode = nfs4_evict_inode,
367 .umount_begin = nfs_umount_begin, 372 .umount_begin = nfs_umount_begin,
368 .show_options = nfs_show_options, 373 .show_options = nfs_show_options,
374 .show_devname = nfs_show_devname,
375 .show_path = nfs_show_path,
369 .show_stats = nfs_show_stats, 376 .show_stats = nfs_show_stats,
370 .remount_fs = nfs_remount, 377 .remount_fs = nfs_remount,
371}; 378};
@@ -726,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
726 return 0; 733 return 0;
727} 734}
728 735
736static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
737{
738 char *page = (char *) __get_free_page(GFP_KERNEL);
739 char *devname, *dummy;
740 int err = 0;
741 if (!page)
742 return -ENOMEM;
743 devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE);
744 if (IS_ERR(devname))
745 err = PTR_ERR(devname);
746 else
747 seq_escape(m, devname, " \t\n\\");
748 free_page((unsigned long)page);
749 return err;
750}
751
752static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
753{
754 seq_puts(m, "/");
755 return 0;
756}
757
729/* 758/*
730 * Present statistical information for this VFS mountpoint 759 * Present statistical information for this VFS mountpoint
731 */ 760 */
@@ -979,6 +1008,27 @@ static int nfs_parse_security_flavors(char *value,
979 return 1; 1008 return 1;
980} 1009}
981 1010
1011static int nfs_get_option_str(substring_t args[], char **option)
1012{
1013 kfree(*option);
1014 *option = match_strdup(args);
1015 return !option;
1016}
1017
1018static int nfs_get_option_ul(substring_t args[], unsigned long *option)
1019{
1020 int rc;
1021 char *string;
1022
1023 string = match_strdup(args);
1024 if (string == NULL)
1025 return -ENOMEM;
1026 rc = strict_strtoul(string, 10, option);
1027 kfree(string);
1028
1029 return rc;
1030}
1031
982/* 1032/*
983 * Error-check and convert a string of mount options from user space into 1033 * Error-check and convert a string of mount options from user space into
984 * a data structure. The whole mount string is processed; bad options are 1034 * a data structure. The whole mount string is processed; bad options are
@@ -1127,155 +1177,82 @@ static int nfs_parse_mount_options(char *raw,
1127 * options that take numeric values 1177 * options that take numeric values
1128 */ 1178 */
1129 case Opt_port: 1179 case Opt_port:
1130 string = match_strdup(args); 1180 if (nfs_get_option_ul(args, &option) ||
1131 if (string == NULL) 1181 option > USHRT_MAX)
1132 goto out_nomem;
1133 rc = strict_strtoul(string, 10, &option);
1134 kfree(string);
1135 if (rc != 0 || option > USHRT_MAX)
1136 goto out_invalid_value; 1182 goto out_invalid_value;
1137 mnt->nfs_server.port = option; 1183 mnt->nfs_server.port = option;
1138 break; 1184 break;
1139 case Opt_rsize: 1185 case Opt_rsize:
1140 string = match_strdup(args); 1186 if (nfs_get_option_ul(args, &option))
1141 if (string == NULL)
1142 goto out_nomem;
1143 rc = strict_strtoul(string, 10, &option);
1144 kfree(string);
1145 if (rc != 0)
1146 goto out_invalid_value; 1187 goto out_invalid_value;
1147 mnt->rsize = option; 1188 mnt->rsize = option;
1148 break; 1189 break;
1149 case Opt_wsize: 1190 case Opt_wsize:
1150 string = match_strdup(args); 1191 if (nfs_get_option_ul(args, &option))
1151 if (string == NULL)
1152 goto out_nomem;
1153 rc = strict_strtoul(string, 10, &option);
1154 kfree(string);
1155 if (rc != 0)
1156 goto out_invalid_value; 1192 goto out_invalid_value;
1157 mnt->wsize = option; 1193 mnt->wsize = option;
1158 break; 1194 break;
1159 case Opt_bsize: 1195 case Opt_bsize:
1160 string = match_strdup(args); 1196 if (nfs_get_option_ul(args, &option))
1161 if (string == NULL)
1162 goto out_nomem;
1163 rc = strict_strtoul(string, 10, &option);
1164 kfree(string);
1165 if (rc != 0)
1166 goto out_invalid_value; 1197 goto out_invalid_value;
1167 mnt->bsize = option; 1198 mnt->bsize = option;
1168 break; 1199 break;
1169 case Opt_timeo: 1200 case Opt_timeo:
1170 string = match_strdup(args); 1201 if (nfs_get_option_ul(args, &option) || option == 0)
1171 if (string == NULL)
1172 goto out_nomem;
1173 rc = strict_strtoul(string, 10, &option);
1174 kfree(string);
1175 if (rc != 0 || option == 0)
1176 goto out_invalid_value; 1202 goto out_invalid_value;
1177 mnt->timeo = option; 1203 mnt->timeo = option;
1178 break; 1204 break;
1179 case Opt_retrans: 1205 case Opt_retrans:
1180 string = match_strdup(args); 1206 if (nfs_get_option_ul(args, &option) || option == 0)
1181 if (string == NULL)
1182 goto out_nomem;
1183 rc = strict_strtoul(string, 10, &option);
1184 kfree(string);
1185 if (rc != 0 || option == 0)
1186 goto out_invalid_value; 1207 goto out_invalid_value;
1187 mnt->retrans = option; 1208 mnt->retrans = option;
1188 break; 1209 break;
1189 case Opt_acregmin: 1210 case Opt_acregmin:
1190 string = match_strdup(args); 1211 if (nfs_get_option_ul(args, &option))
1191 if (string == NULL)
1192 goto out_nomem;
1193 rc = strict_strtoul(string, 10, &option);
1194 kfree(string);
1195 if (rc != 0)
1196 goto out_invalid_value; 1212 goto out_invalid_value;
1197 mnt->acregmin = option; 1213 mnt->acregmin = option;
1198 break; 1214 break;
1199 case Opt_acregmax: 1215 case Opt_acregmax:
1200 string = match_strdup(args); 1216 if (nfs_get_option_ul(args, &option))
1201 if (string == NULL)
1202 goto out_nomem;
1203 rc = strict_strtoul(string, 10, &option);
1204 kfree(string);
1205 if (rc != 0)
1206 goto out_invalid_value; 1217 goto out_invalid_value;
1207 mnt->acregmax = option; 1218 mnt->acregmax = option;
1208 break; 1219 break;
1209 case Opt_acdirmin: 1220 case Opt_acdirmin:
1210 string = match_strdup(args); 1221 if (nfs_get_option_ul(args, &option))
1211 if (string == NULL)
1212 goto out_nomem;
1213 rc = strict_strtoul(string, 10, &option);
1214 kfree(string);
1215 if (rc != 0)
1216 goto out_invalid_value; 1222 goto out_invalid_value;
1217 mnt->acdirmin = option; 1223 mnt->acdirmin = option;
1218 break; 1224 break;
1219 case Opt_acdirmax: 1225 case Opt_acdirmax:
1220 string = match_strdup(args); 1226 if (nfs_get_option_ul(args, &option))
1221 if (string == NULL)
1222 goto out_nomem;
1223 rc = strict_strtoul(string, 10, &option);
1224 kfree(string);
1225 if (rc != 0)
1226 goto out_invalid_value; 1227 goto out_invalid_value;
1227 mnt->acdirmax = option; 1228 mnt->acdirmax = option;
1228 break; 1229 break;
1229 case Opt_actimeo: 1230 case Opt_actimeo:
1230 string = match_strdup(args); 1231 if (nfs_get_option_ul(args, &option))
1231 if (string == NULL)
1232 goto out_nomem;
1233 rc = strict_strtoul(string, 10, &option);
1234 kfree(string);
1235 if (rc != 0)
1236 goto out_invalid_value; 1232 goto out_invalid_value;
1237 mnt->acregmin = mnt->acregmax = 1233 mnt->acregmin = mnt->acregmax =
1238 mnt->acdirmin = mnt->acdirmax = option; 1234 mnt->acdirmin = mnt->acdirmax = option;
1239 break; 1235 break;
1240 case Opt_namelen: 1236 case Opt_namelen:
1241 string = match_strdup(args); 1237 if (nfs_get_option_ul(args, &option))
1242 if (string == NULL)
1243 goto out_nomem;
1244 rc = strict_strtoul(string, 10, &option);
1245 kfree(string);
1246 if (rc != 0)
1247 goto out_invalid_value; 1238 goto out_invalid_value;
1248 mnt->namlen = option; 1239 mnt->namlen = option;
1249 break; 1240 break;
1250 case Opt_mountport: 1241 case Opt_mountport:
1251 string = match_strdup(args); 1242 if (nfs_get_option_ul(args, &option) ||
1252 if (string == NULL) 1243 option > USHRT_MAX)
1253 goto out_nomem;
1254 rc = strict_strtoul(string, 10, &option);
1255 kfree(string);
1256 if (rc != 0 || option > USHRT_MAX)
1257 goto out_invalid_value; 1244 goto out_invalid_value;
1258 mnt->mount_server.port = option; 1245 mnt->mount_server.port = option;
1259 break; 1246 break;
1260 case Opt_mountvers: 1247 case Opt_mountvers:
1261 string = match_strdup(args); 1248 if (nfs_get_option_ul(args, &option) ||
1262 if (string == NULL)
1263 goto out_nomem;
1264 rc = strict_strtoul(string, 10, &option);
1265 kfree(string);
1266 if (rc != 0 ||
1267 option < NFS_MNT_VERSION || 1249 option < NFS_MNT_VERSION ||
1268 option > NFS_MNT3_VERSION) 1250 option > NFS_MNT3_VERSION)
1269 goto out_invalid_value; 1251 goto out_invalid_value;
1270 mnt->mount_server.version = option; 1252 mnt->mount_server.version = option;
1271 break; 1253 break;
1272 case Opt_nfsvers: 1254 case Opt_nfsvers:
1273 string = match_strdup(args); 1255 if (nfs_get_option_ul(args, &option))
1274 if (string == NULL)
1275 goto out_nomem;
1276 rc = strict_strtoul(string, 10, &option);
1277 kfree(string);
1278 if (rc != 0)
1279 goto out_invalid_value; 1256 goto out_invalid_value;
1280 switch (option) { 1257 switch (option) {
1281 case NFS2_VERSION: 1258 case NFS2_VERSION:
@@ -1295,12 +1272,7 @@ static int nfs_parse_mount_options(char *raw,
1295 } 1272 }
1296 break; 1273 break;
1297 case Opt_minorversion: 1274 case Opt_minorversion:
1298 string = match_strdup(args); 1275 if (nfs_get_option_ul(args, &option))
1299 if (string == NULL)
1300 goto out_nomem;
1301 rc = strict_strtoul(string, 10, &option);
1302 kfree(string);
1303 if (rc != 0)
1304 goto out_invalid_value; 1276 goto out_invalid_value;
1305 if (option > NFS4_MAX_MINOR_VERSION) 1277 if (option > NFS4_MAX_MINOR_VERSION)
1306 goto out_invalid_value; 1278 goto out_invalid_value;
@@ -1336,21 +1308,18 @@ static int nfs_parse_mount_options(char *raw,
1336 case Opt_xprt_udp: 1308 case Opt_xprt_udp:
1337 mnt->flags &= ~NFS_MOUNT_TCP; 1309 mnt->flags &= ~NFS_MOUNT_TCP;
1338 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1310 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
1339 kfree(string);
1340 break; 1311 break;
1341 case Opt_xprt_tcp6: 1312 case Opt_xprt_tcp6:
1342 protofamily = AF_INET6; 1313 protofamily = AF_INET6;
1343 case Opt_xprt_tcp: 1314 case Opt_xprt_tcp:
1344 mnt->flags |= NFS_MOUNT_TCP; 1315 mnt->flags |= NFS_MOUNT_TCP;
1345 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1316 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1346 kfree(string);
1347 break; 1317 break;
1348 case Opt_xprt_rdma: 1318 case Opt_xprt_rdma:
1349 /* vector side protocols to TCP */ 1319 /* vector side protocols to TCP */
1350 mnt->flags |= NFS_MOUNT_TCP; 1320 mnt->flags |= NFS_MOUNT_TCP;
1351 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; 1321 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
1352 xprt_load_transport(string); 1322 xprt_load_transport(string);
1353 kfree(string);
1354 break; 1323 break;
1355 default: 1324 default:
1356 dfprintk(MOUNT, "NFS: unrecognized " 1325 dfprintk(MOUNT, "NFS: unrecognized "
@@ -1358,6 +1327,7 @@ static int nfs_parse_mount_options(char *raw,
1358 kfree(string); 1327 kfree(string);
1359 return 0; 1328 return 0;
1360 } 1329 }
1330 kfree(string);
1361 break; 1331 break;
1362 case Opt_mountproto: 1332 case Opt_mountproto:
1363 string = match_strdup(args); 1333 string = match_strdup(args);
@@ -1400,18 +1370,13 @@ static int nfs_parse_mount_options(char *raw,
1400 goto out_invalid_address; 1370 goto out_invalid_address;
1401 break; 1371 break;
1402 case Opt_clientaddr: 1372 case Opt_clientaddr:
1403 string = match_strdup(args); 1373 if (nfs_get_option_str(args, &mnt->client_address))
1404 if (string == NULL)
1405 goto out_nomem; 1374 goto out_nomem;
1406 kfree(mnt->client_address);
1407 mnt->client_address = string;
1408 break; 1375 break;
1409 case Opt_mounthost: 1376 case Opt_mounthost:
1410 string = match_strdup(args); 1377 if (nfs_get_option_str(args,
1411 if (string == NULL) 1378 &mnt->mount_server.hostname))
1412 goto out_nomem; 1379 goto out_nomem;
1413 kfree(mnt->mount_server.hostname);
1414 mnt->mount_server.hostname = string;
1415 break; 1380 break;
1416 case Opt_mountaddr: 1381 case Opt_mountaddr:
1417 string = match_strdup(args); 1382 string = match_strdup(args);
@@ -1451,11 +1416,8 @@ static int nfs_parse_mount_options(char *raw,
1451 }; 1416 };
1452 break; 1417 break;
1453 case Opt_fscache_uniq: 1418 case Opt_fscache_uniq:
1454 string = match_strdup(args); 1419 if (nfs_get_option_str(args, &mnt->fscache_uniq))
1455 if (string == NULL)
1456 goto out_nomem; 1420 goto out_nomem;
1457 kfree(mnt->fscache_uniq);
1458 mnt->fscache_uniq = string;
1459 mnt->options |= NFS_OPTION_FSCACHE; 1421 mnt->options |= NFS_OPTION_FSCACHE;
1460 break; 1422 break;
1461 case Opt_local_lock: 1423 case Opt_local_lock:
@@ -1665,99 +1627,59 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1665 return nfs_walk_authlist(args, &request); 1627 return nfs_walk_authlist(args, &request);
1666} 1628}
1667 1629
1668static int nfs_parse_simple_hostname(const char *dev_name, 1630/*
1669 char **hostname, size_t maxnamlen, 1631 * Split "dev_name" into "hostname:export_path".
1670 char **export_path, size_t maxpathlen) 1632 *
1633 * The leftmost colon demarks the split between the server's hostname
1634 * and the export path. If the hostname starts with a left square
1635 * bracket, then it may contain colons.
1636 *
1637 * Note: caller frees hostname and export path, even on error.
1638 */
1639static int nfs_parse_devname(const char *dev_name,
1640 char **hostname, size_t maxnamlen,
1641 char **export_path, size_t maxpathlen)
1671{ 1642{
1672 size_t len; 1643 size_t len;
1673 char *colon, *comma; 1644 char *end;
1674
1675 colon = strchr(dev_name, ':');
1676 if (colon == NULL)
1677 goto out_bad_devname;
1678
1679 len = colon - dev_name;
1680 if (len > maxnamlen)
1681 goto out_hostname;
1682 1645
1683 /* N.B. caller will free nfs_server.hostname in all cases */ 1646 /* Is the host name protected with square brakcets? */
1684 *hostname = kstrndup(dev_name, len, GFP_KERNEL); 1647 if (*dev_name == '[') {
1685 if (!*hostname) 1648 end = strchr(++dev_name, ']');
1686 goto out_nomem; 1649 if (end == NULL || end[1] != ':')
1687
1688 /* kill possible hostname list: not supported */
1689 comma = strchr(*hostname, ',');
1690 if (comma != NULL) {
1691 if (comma == *hostname)
1692 goto out_bad_devname; 1650 goto out_bad_devname;
1693 *comma = '\0';
1694 }
1695 1651
1696 colon++; 1652 len = end - dev_name;
1697 len = strlen(colon); 1653 end++;
1698 if (len > maxpathlen) 1654 } else {
1699 goto out_path; 1655 char *comma;
1700 *export_path = kstrndup(colon, len, GFP_KERNEL);
1701 if (!*export_path)
1702 goto out_nomem;
1703
1704 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
1705 return 0;
1706
1707out_bad_devname:
1708 dfprintk(MOUNT, "NFS: device name not in host:path format\n");
1709 return -EINVAL;
1710
1711out_nomem:
1712 dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
1713 return -ENOMEM;
1714
1715out_hostname:
1716 dfprintk(MOUNT, "NFS: server hostname too long\n");
1717 return -ENAMETOOLONG;
1718
1719out_path:
1720 dfprintk(MOUNT, "NFS: export pathname too long\n");
1721 return -ENAMETOOLONG;
1722}
1723
1724/*
1725 * Hostname has square brackets around it because it contains one or
1726 * more colons. We look for the first closing square bracket, and a
1727 * colon must follow it.
1728 */
1729static int nfs_parse_protected_hostname(const char *dev_name,
1730 char **hostname, size_t maxnamlen,
1731 char **export_path, size_t maxpathlen)
1732{
1733 size_t len;
1734 char *start, *end;
1735 1656
1736 start = (char *)(dev_name + 1); 1657 end = strchr(dev_name, ':');
1658 if (end == NULL)
1659 goto out_bad_devname;
1660 len = end - dev_name;
1737 1661
1738 end = strchr(start, ']'); 1662 /* kill possible hostname list: not supported */
1739 if (end == NULL) 1663 comma = strchr(dev_name, ',');
1740 goto out_bad_devname; 1664 if (comma != NULL && comma < end)
1741 if (*(end + 1) != ':') 1665 *comma = 0;
1742 goto out_bad_devname; 1666 }
1743 1667
1744 len = end - start;
1745 if (len > maxnamlen) 1668 if (len > maxnamlen)
1746 goto out_hostname; 1669 goto out_hostname;
1747 1670
1748 /* N.B. caller will free nfs_server.hostname in all cases */ 1671 /* N.B. caller will free nfs_server.hostname in all cases */
1749 *hostname = kstrndup(start, len, GFP_KERNEL); 1672 *hostname = kstrndup(dev_name, len, GFP_KERNEL);
1750 if (*hostname == NULL) 1673 if (*hostname == NULL)
1751 goto out_nomem; 1674 goto out_nomem;
1752 1675 len = strlen(++end);
1753 end += 2;
1754 len = strlen(end);
1755 if (len > maxpathlen) 1676 if (len > maxpathlen)
1756 goto out_path; 1677 goto out_path;
1757 *export_path = kstrndup(end, len, GFP_KERNEL); 1678 *export_path = kstrndup(end, len, GFP_KERNEL);
1758 if (!*export_path) 1679 if (!*export_path)
1759 goto out_nomem; 1680 goto out_nomem;
1760 1681
1682 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
1761 return 0; 1683 return 0;
1762 1684
1763out_bad_devname: 1685out_bad_devname:
@@ -1778,29 +1700,6 @@ out_path:
1778} 1700}
1779 1701
1780/* 1702/*
1781 * Split "dev_name" into "hostname:export_path".
1782 *
1783 * The leftmost colon demarks the split between the server's hostname
1784 * and the export path. If the hostname starts with a left square
1785 * bracket, then it may contain colons.
1786 *
1787 * Note: caller frees hostname and export path, even on error.
1788 */
1789static int nfs_parse_devname(const char *dev_name,
1790 char **hostname, size_t maxnamlen,
1791 char **export_path, size_t maxpathlen)
1792{
1793 if (*dev_name == '[')
1794 return nfs_parse_protected_hostname(dev_name,
1795 hostname, maxnamlen,
1796 export_path, maxpathlen);
1797
1798 return nfs_parse_simple_hostname(dev_name,
1799 hostname, maxnamlen,
1800 export_path, maxpathlen);
1801}
1802
1803/*
1804 * Validate the NFS2/NFS3 mount data 1703 * Validate the NFS2/NFS3 mount data
1805 * - fills in the mount root filehandle 1704 * - fills in the mount root filehandle
1806 * 1705 *
@@ -2267,19 +2166,19 @@ static int nfs_bdi_register(struct nfs_server *server)
2267 return bdi_register_dev(&server->backing_dev_info, server->s_dev); 2166 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
2268} 2167}
2269 2168
2270static int nfs_get_sb(struct file_system_type *fs_type, 2169static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2271 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2170 int flags, const char *dev_name, void *raw_data)
2272{ 2171{
2273 struct nfs_server *server = NULL; 2172 struct nfs_server *server = NULL;
2274 struct super_block *s; 2173 struct super_block *s;
2275 struct nfs_parsed_mount_data *data; 2174 struct nfs_parsed_mount_data *data;
2276 struct nfs_fh *mntfh; 2175 struct nfs_fh *mntfh;
2277 struct dentry *mntroot; 2176 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2278 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2177 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2279 struct nfs_sb_mountdata sb_mntdata = { 2178 struct nfs_sb_mountdata sb_mntdata = {
2280 .mntflags = flags, 2179 .mntflags = flags,
2281 }; 2180 };
2282 int error = -ENOMEM; 2181 int error;
2283 2182
2284 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); 2183 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2285 mntfh = nfs_alloc_fhandle(); 2184 mntfh = nfs_alloc_fhandle();
@@ -2290,12 +2189,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2290 2189
2291 /* Validate the mount data */ 2190 /* Validate the mount data */
2292 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); 2191 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
2293 if (error < 0) 2192 if (error < 0) {
2193 mntroot = ERR_PTR(error);
2294 goto out; 2194 goto out;
2195 }
2295 2196
2296#ifdef CONFIG_NFS_V4 2197#ifdef CONFIG_NFS_V4
2297 if (data->version == 4) { 2198 if (data->version == 4) {
2298 error = nfs4_try_mount(flags, dev_name, data, mnt); 2199 mntroot = nfs4_try_mount(flags, dev_name, data);
2299 kfree(data->client_address); 2200 kfree(data->client_address);
2300 kfree(data->nfs_server.export_path); 2201 kfree(data->nfs_server.export_path);
2301 goto out; 2202 goto out;
@@ -2305,7 +2206,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2305 /* Get a volume representation */ 2206 /* Get a volume representation */
2306 server = nfs_create_server(data, mntfh); 2207 server = nfs_create_server(data, mntfh);
2307 if (IS_ERR(server)) { 2208 if (IS_ERR(server)) {
2308 error = PTR_ERR(server); 2209 mntroot = ERR_CAST(server);
2309 goto out; 2210 goto out;
2310 } 2211 }
2311 sb_mntdata.server = server; 2212 sb_mntdata.server = server;
@@ -2316,7 +2217,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2316 /* Get a superblock - note that we may end up sharing one that already exists */ 2217 /* Get a superblock - note that we may end up sharing one that already exists */
2317 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); 2218 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
2318 if (IS_ERR(s)) { 2219 if (IS_ERR(s)) {
2319 error = PTR_ERR(s); 2220 mntroot = ERR_CAST(s);
2320 goto out_err_nosb; 2221 goto out_err_nosb;
2321 } 2222 }
2322 2223
@@ -2325,8 +2226,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2325 server = NULL; 2226 server = NULL;
2326 } else { 2227 } else {
2327 error = nfs_bdi_register(server); 2228 error = nfs_bdi_register(server);
2328 if (error) 2229 if (error) {
2230 mntroot = ERR_PTR(error);
2329 goto error_splat_bdi; 2231 goto error_splat_bdi;
2232 }
2330 } 2233 }
2331 2234
2332 if (!s->s_root) { 2235 if (!s->s_root) {
@@ -2336,20 +2239,15 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2336 s, data ? data->fscache_uniq : NULL, NULL); 2239 s, data ? data->fscache_uniq : NULL, NULL);
2337 } 2240 }
2338 2241
2339 mntroot = nfs_get_root(s, mntfh); 2242 mntroot = nfs_get_root(s, mntfh, dev_name);
2340 if (IS_ERR(mntroot)) { 2243 if (IS_ERR(mntroot))
2341 error = PTR_ERR(mntroot);
2342 goto error_splat_super; 2244 goto error_splat_super;
2343 }
2344 2245
2345 error = security_sb_set_mnt_opts(s, &data->lsm_opts); 2246 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2346 if (error) 2247 if (error)
2347 goto error_splat_root; 2248 goto error_splat_root;
2348 2249
2349 s->s_flags |= MS_ACTIVE; 2250 s->s_flags |= MS_ACTIVE;
2350 mnt->mnt_sb = s;
2351 mnt->mnt_root = mntroot;
2352 error = 0;
2353 2251
2354out: 2252out:
2355 kfree(data->nfs_server.hostname); 2253 kfree(data->nfs_server.hostname);
@@ -2359,7 +2257,7 @@ out:
2359out_free_fh: 2257out_free_fh:
2360 nfs_free_fhandle(mntfh); 2258 nfs_free_fhandle(mntfh);
2361 kfree(data); 2259 kfree(data);
2362 return error; 2260 return mntroot;
2363 2261
2364out_err_nosb: 2262out_err_nosb:
2365 nfs_free_server(server); 2263 nfs_free_server(server);
@@ -2367,6 +2265,7 @@ out_err_nosb:
2367 2265
2368error_splat_root: 2266error_splat_root:
2369 dput(mntroot); 2267 dput(mntroot);
2268 mntroot = ERR_PTR(error);
2370error_splat_super: 2269error_splat_super:
2371 if (server && !s->s_root) 2270 if (server && !s->s_root)
2372 bdi_unregister(&server->backing_dev_info); 2271 bdi_unregister(&server->backing_dev_info);
@@ -2450,7 +2349,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2450 nfs_fscache_get_super_cookie(s, NULL, data); 2349 nfs_fscache_get_super_cookie(s, NULL, data);
2451 } 2350 }
2452 2351
2453 mntroot = nfs_get_root(s, data->fh); 2352 mntroot = nfs_get_root(s, data->fh, dev_name);
2454 if (IS_ERR(mntroot)) { 2353 if (IS_ERR(mntroot)) {
2455 error = PTR_ERR(mntroot); 2354 error = PTR_ERR(mntroot);
2456 goto error_splat_super; 2355 goto error_splat_super;
@@ -2718,7 +2617,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2718 s, data ? data->fscache_uniq : NULL, NULL); 2617 s, data ? data->fscache_uniq : NULL, NULL);
2719 } 2618 }
2720 2619
2721 mntroot = nfs4_get_root(s, mntfh); 2620 mntroot = nfs4_get_root(s, mntfh, dev_name);
2722 if (IS_ERR(mntroot)) { 2621 if (IS_ERR(mntroot)) {
2723 error = PTR_ERR(mntroot); 2622 error = PTR_ERR(mntroot);
2724 goto error_splat_super; 2623 goto error_splat_super;
@@ -2771,27 +2670,6 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
2771 return root_mnt; 2670 return root_mnt;
2772} 2671}
2773 2672
2774static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2775{
2776 char *page = (char *) __get_free_page(GFP_KERNEL);
2777 char *devname, *tmp;
2778
2779 if (page == NULL)
2780 return;
2781 devname = nfs_path(path->mnt->mnt_devname,
2782 path->mnt->mnt_root, path->dentry,
2783 page, PAGE_SIZE);
2784 if (IS_ERR(devname))
2785 goto out_freepage;
2786 tmp = kstrdup(devname, GFP_KERNEL);
2787 if (tmp == NULL)
2788 goto out_freepage;
2789 kfree(mnt->mnt_devname);
2790 mnt->mnt_devname = tmp;
2791out_freepage:
2792 free_page((unsigned long)page);
2793}
2794
2795struct nfs_referral_count { 2673struct nfs_referral_count {
2796 struct list_head list; 2674 struct list_head list;
2797 const struct task_struct *task; 2675 const struct task_struct *task;
@@ -2858,17 +2736,18 @@ static void nfs_referral_loop_unprotect(void)
2858 kfree(p); 2736 kfree(p);
2859} 2737}
2860 2738
2861static int nfs_follow_remote_path(struct vfsmount *root_mnt, 2739static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2862 const char *export_path, struct vfsmount *mnt_target) 2740 const char *export_path)
2863{ 2741{
2864 struct nameidata *nd = NULL; 2742 struct nameidata *nd = NULL;
2865 struct mnt_namespace *ns_private; 2743 struct mnt_namespace *ns_private;
2866 struct super_block *s; 2744 struct super_block *s;
2745 struct dentry *dentry;
2867 int ret; 2746 int ret;
2868 2747
2869 nd = kmalloc(sizeof(*nd), GFP_KERNEL); 2748 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
2870 if (nd == NULL) 2749 if (nd == NULL)
2871 return -ENOMEM; 2750 return ERR_PTR(-ENOMEM);
2872 2751
2873 ns_private = create_mnt_ns(root_mnt); 2752 ns_private = create_mnt_ns(root_mnt);
2874 ret = PTR_ERR(ns_private); 2753 ret = PTR_ERR(ns_private);
@@ -2890,32 +2769,27 @@ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
2890 2769
2891 s = nd->path.mnt->mnt_sb; 2770 s = nd->path.mnt->mnt_sb;
2892 atomic_inc(&s->s_active); 2771 atomic_inc(&s->s_active);
2893 mnt_target->mnt_sb = s; 2772 dentry = dget(nd->path.dentry);
2894 mnt_target->mnt_root = dget(nd->path.dentry);
2895
2896 /* Correct the device pathname */
2897 nfs_fix_devname(&nd->path, mnt_target);
2898 2773
2899 path_put(&nd->path); 2774 path_put(&nd->path);
2900 kfree(nd); 2775 kfree(nd);
2901 down_write(&s->s_umount); 2776 down_write(&s->s_umount);
2902 return 0; 2777 return dentry;
2903out_put_mnt_ns: 2778out_put_mnt_ns:
2904 put_mnt_ns(ns_private); 2779 put_mnt_ns(ns_private);
2905out_mntput: 2780out_mntput:
2906 mntput(root_mnt); 2781 mntput(root_mnt);
2907out_err: 2782out_err:
2908 kfree(nd); 2783 kfree(nd);
2909 return ret; 2784 return ERR_PTR(ret);
2910} 2785}
2911 2786
2912static int nfs4_try_mount(int flags, const char *dev_name, 2787static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2913 struct nfs_parsed_mount_data *data, 2788 struct nfs_parsed_mount_data *data)
2914 struct vfsmount *mnt)
2915{ 2789{
2916 char *export_path; 2790 char *export_path;
2917 struct vfsmount *root_mnt; 2791 struct vfsmount *root_mnt;
2918 int error; 2792 struct dentry *res;
2919 2793
2920 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2794 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2921 2795
@@ -2925,26 +2799,25 @@ static int nfs4_try_mount(int flags, const char *dev_name,
2925 data->nfs_server.hostname); 2799 data->nfs_server.hostname);
2926 data->nfs_server.export_path = export_path; 2800 data->nfs_server.export_path = export_path;
2927 2801
2928 error = PTR_ERR(root_mnt); 2802 res = ERR_CAST(root_mnt);
2929 if (IS_ERR(root_mnt)) 2803 if (!IS_ERR(root_mnt))
2930 goto out; 2804 res = nfs_follow_remote_path(root_mnt, export_path);
2931
2932 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2933 2805
2934out: 2806 dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
2935 dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error, 2807 IS_ERR(res) ? PTR_ERR(res) : 0,
2936 error != 0 ? " [error]" : ""); 2808 IS_ERR(res) ? " [error]" : "");
2937 return error; 2809 return res;
2938} 2810}
2939 2811
2940/* 2812/*
2941 * Get the superblock for an NFS4 mountpoint 2813 * Get the superblock for an NFS4 mountpoint
2942 */ 2814 */
2943static int nfs4_get_sb(struct file_system_type *fs_type, 2815static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2944 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2816 int flags, const char *dev_name, void *raw_data)
2945{ 2817{
2946 struct nfs_parsed_mount_data *data; 2818 struct nfs_parsed_mount_data *data;
2947 int error = -ENOMEM; 2819 int error = -ENOMEM;
2820 struct dentry *res = ERR_PTR(-ENOMEM);
2948 2821
2949 data = nfs_alloc_parsed_mount_data(4); 2822 data = nfs_alloc_parsed_mount_data(4);
2950 if (data == NULL) 2823 if (data == NULL)
@@ -2952,10 +2825,14 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2952 2825
2953 /* Validate the mount data */ 2826 /* Validate the mount data */
2954 error = nfs4_validate_mount_data(raw_data, data, dev_name); 2827 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2955 if (error < 0) 2828 if (error < 0) {
2829 res = ERR_PTR(error);
2956 goto out; 2830 goto out;
2831 }
2957 2832
2958 error = nfs4_try_mount(flags, dev_name, data, mnt); 2833 res = nfs4_try_mount(flags, dev_name, data);
2834 if (IS_ERR(res))
2835 error = PTR_ERR(res);
2959 2836
2960out: 2837out:
2961 kfree(data->client_address); 2838 kfree(data->client_address);
@@ -2964,9 +2841,9 @@ out:
2964 kfree(data->fscache_uniq); 2841 kfree(data->fscache_uniq);
2965out_free_data: 2842out_free_data:
2966 kfree(data); 2843 kfree(data);
2967 dprintk("<-- nfs4_get_sb() = %d%s\n", error, 2844 dprintk("<-- nfs4_mount() = %d%s\n", error,
2968 error != 0 ? " [error]" : ""); 2845 error != 0 ? " [error]" : "");
2969 return error; 2846 return res;
2970} 2847}
2971 2848
2972static void nfs4_kill_super(struct super_block *sb) 2849static void nfs4_kill_super(struct super_block *sb)
@@ -3033,7 +2910,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
3033 nfs_fscache_get_super_cookie(s, NULL, data); 2910 nfs_fscache_get_super_cookie(s, NULL, data);
3034 } 2911 }
3035 2912
3036 mntroot = nfs4_get_root(s, data->fh); 2913 mntroot = nfs4_get_root(s, data->fh, dev_name);
3037 if (IS_ERR(mntroot)) { 2914 if (IS_ERR(mntroot)) {
3038 error = PTR_ERR(mntroot); 2915 error = PTR_ERR(mntroot);
3039 goto error_splat_super; 2916 goto error_splat_super;
@@ -3120,7 +2997,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3120 nfs_fscache_get_super_cookie(s, NULL, data); 2997 nfs_fscache_get_super_cookie(s, NULL, data);
3121 } 2998 }
3122 2999
3123 mntroot = nfs4_get_root(s, mntfh); 3000 mntroot = nfs4_get_root(s, mntfh, dev_name);
3124 if (IS_ERR(mntroot)) { 3001 if (IS_ERR(mntroot)) {
3125 error = PTR_ERR(mntroot); 3002 error = PTR_ERR(mntroot);
3126 goto error_splat_super; 3003 goto error_splat_super;
@@ -3160,16 +3037,15 @@ error_splat_bdi:
3160/* 3037/*
3161 * Create an NFS4 server record on referral traversal 3038 * Create an NFS4 server record on referral traversal
3162 */ 3039 */
3163static int nfs4_referral_get_sb(struct file_system_type *fs_type, 3040static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
3164 int flags, const char *dev_name, void *raw_data, 3041 int flags, const char *dev_name, void *raw_data)
3165 struct vfsmount *mnt)
3166{ 3042{
3167 struct nfs_clone_mount *data = raw_data; 3043 struct nfs_clone_mount *data = raw_data;
3168 char *export_path; 3044 char *export_path;
3169 struct vfsmount *root_mnt; 3045 struct vfsmount *root_mnt;
3170 int error; 3046 struct dentry *res;
3171 3047
3172 dprintk("--> nfs4_referral_get_sb()\n"); 3048 dprintk("--> nfs4_referral_mount()\n");
3173 3049
3174 export_path = data->mnt_path; 3050 export_path = data->mnt_path;
3175 data->mnt_path = "/"; 3051 data->mnt_path = "/";
@@ -3178,15 +3054,13 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type,
3178 flags, data, data->hostname); 3054 flags, data, data->hostname);
3179 data->mnt_path = export_path; 3055 data->mnt_path = export_path;
3180 3056
3181 error = PTR_ERR(root_mnt); 3057 res = ERR_CAST(root_mnt);
3182 if (IS_ERR(root_mnt)) 3058 if (!IS_ERR(root_mnt))
3183 goto out; 3059 res = nfs_follow_remote_path(root_mnt, export_path);
3184 3060 dprintk("<-- nfs4_referral_mount() = %ld%s\n",
3185 error = nfs_follow_remote_path(root_mnt, export_path, mnt); 3061 IS_ERR(res) ? PTR_ERR(res) : 0,
3186out: 3062 IS_ERR(res) ? " [error]" : "");
3187 dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error, 3063 return res;
3188 error != 0 ? " [error]" : "");
3189 return error;
3190} 3064}
3191 3065
3192#endif /* CONFIG_NFS_V4 */ 3066#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..8d6864c2a5fa 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -148,6 +148,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
148 alias = d_lookup(parent, &data->args.name); 148 alias = d_lookup(parent, &data->args.name);
149 if (alias != NULL) { 149 if (alias != NULL) {
150 int ret = 0; 150 int ret = 0;
151 void *devname_garbage = NULL;
151 152
152 /* 153 /*
153 * Hey, we raced with lookup... See if we need to transfer 154 * Hey, we raced with lookup... See if we need to transfer
@@ -157,6 +158,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
157 spin_lock(&alias->d_lock); 158 spin_lock(&alias->d_lock);
158 if (alias->d_inode != NULL && 159 if (alias->d_inode != NULL &&
159 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { 160 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
161 devname_garbage = alias->d_fsdata;
160 alias->d_fsdata = data; 162 alias->d_fsdata = data;
161 alias->d_flags |= DCACHE_NFSFS_RENAMED; 163 alias->d_flags |= DCACHE_NFSFS_RENAMED;
162 ret = 1; 164 ret = 1;
@@ -164,6 +166,13 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
164 spin_unlock(&alias->d_lock); 166 spin_unlock(&alias->d_lock);
165 nfs_dec_sillycount(dir); 167 nfs_dec_sillycount(dir);
166 dput(alias); 168 dput(alias);
169 /*
170 * If we'd displaced old cached devname, free it. At that
171 * point dentry is definitely not a root, so we won't need
172 * that anymore.
173 */
174 if (devname_garbage)
175 kfree(devname_garbage);
167 return ret; 176 return ret;
168 } 177 }
169 data->dir = igrab(dir); 178 data->dir = igrab(dir);
@@ -180,7 +189,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
180 task_setup_data.rpc_client = NFS_CLIENT(dir); 189 task_setup_data.rpc_client = NFS_CLIENT(dir);
181 task = rpc_run_task(&task_setup_data); 190 task = rpc_run_task(&task_setup_data);
182 if (!IS_ERR(task)) 191 if (!IS_ERR(task))
183 rpc_put_task(task); 192 rpc_put_task_async(task);
184 return 1; 193 return 1;
185} 194}
186 195
@@ -252,6 +261,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
252{ 261{
253 struct nfs_unlinkdata *data; 262 struct nfs_unlinkdata *data;
254 int status = -ENOMEM; 263 int status = -ENOMEM;
264 void *devname_garbage = NULL;
255 265
256 data = kzalloc(sizeof(*data), GFP_KERNEL); 266 data = kzalloc(sizeof(*data), GFP_KERNEL);
257 if (data == NULL) 267 if (data == NULL)
@@ -269,8 +279,16 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
269 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) 279 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
270 goto out_unlock; 280 goto out_unlock;
271 dentry->d_flags |= DCACHE_NFSFS_RENAMED; 281 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
282 devname_garbage = dentry->d_fsdata;
272 dentry->d_fsdata = data; 283 dentry->d_fsdata = data;
273 spin_unlock(&dentry->d_lock); 284 spin_unlock(&dentry->d_lock);
285 /*
286 * If we'd displaced old cached devname, free it. At that
287 * point dentry is definitely not a root, so we won't need
288 * that anymore.
289 */
290 if (devname_garbage)
291 kfree(devname_garbage);
274 return 0; 292 return 0;
275out_unlock: 293out_unlock:
276 spin_unlock(&dentry->d_lock); 294 spin_unlock(&dentry->d_lock);
@@ -299,6 +317,7 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
299 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { 317 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
300 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; 318 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
301 data = dentry->d_fsdata; 319 data = dentry->d_fsdata;
320 dentry->d_fsdata = NULL;
302 } 321 }
303 spin_unlock(&dentry->d_lock); 322 spin_unlock(&dentry->d_lock);
304 323
@@ -315,6 +334,7 @@ nfs_cancel_async_unlink(struct dentry *dentry)
315 struct nfs_unlinkdata *data = dentry->d_fsdata; 334 struct nfs_unlinkdata *data = dentry->d_fsdata;
316 335
317 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; 336 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
337 dentry->d_fsdata = NULL;
318 spin_unlock(&dentry->d_lock); 338 spin_unlock(&dentry->d_lock);
319 nfs_free_unlinkdata(data); 339 nfs_free_unlinkdata(data);
320 return; 340 return;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..47a3ad63e0d5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -28,6 +28,7 @@
28#include "iostat.h" 28#include "iostat.h"
29#include "nfs4_fs.h" 29#include "nfs4_fs.h"
30#include "fscache.h" 30#include "fscache.h"
31#include "pnfs.h"
31 32
32#define NFSDBG_FACILITY NFSDBG_PAGECACHE 33#define NFSDBG_FACILITY NFSDBG_PAGECACHE
33 34
@@ -96,6 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
96 97
97static void nfs_writedata_release(struct nfs_write_data *wdata) 98static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 99{
100 put_lseg(wdata->lseg);
99 put_nfs_open_context(wdata->args.context); 101 put_nfs_open_context(wdata->args.context);
100 nfs_writedata_free(wdata); 102 nfs_writedata_free(wdata);
101} 103}
@@ -781,25 +783,21 @@ static int flush_task_priority(int how)
781 return RPC_PRIORITY_NORMAL; 783 return RPC_PRIORITY_NORMAL;
782} 784}
783 785
784/* 786int nfs_initiate_write(struct nfs_write_data *data,
785 * Set up the argument/result storage required for the RPC call. 787 struct rpc_clnt *clnt,
786 */ 788 const struct rpc_call_ops *call_ops,
787static int nfs_write_rpcsetup(struct nfs_page *req, 789 int how)
788 struct nfs_write_data *data,
789 const struct rpc_call_ops *call_ops,
790 unsigned int count, unsigned int offset,
791 int how)
792{ 790{
793 struct inode *inode = req->wb_context->path.dentry->d_inode; 791 struct inode *inode = data->inode;
794 int priority = flush_task_priority(how); 792 int priority = flush_task_priority(how);
795 struct rpc_task *task; 793 struct rpc_task *task;
796 struct rpc_message msg = { 794 struct rpc_message msg = {
797 .rpc_argp = &data->args, 795 .rpc_argp = &data->args,
798 .rpc_resp = &data->res, 796 .rpc_resp = &data->res,
799 .rpc_cred = req->wb_context->cred, 797 .rpc_cred = data->cred,
800 }; 798 };
801 struct rpc_task_setup task_setup_data = { 799 struct rpc_task_setup task_setup_data = {
802 .rpc_client = NFS_CLIENT(inode), 800 .rpc_client = clnt,
803 .task = &data->task, 801 .task = &data->task,
804 .rpc_message = &msg, 802 .rpc_message = &msg,
805 .callback_ops = call_ops, 803 .callback_ops = call_ops,
@@ -810,12 +808,52 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
810 }; 808 };
811 int ret = 0; 809 int ret = 0;
812 810
811 /* Set up the initial task struct. */
812 NFS_PROTO(inode)->write_setup(data, &msg);
813
814 dprintk("NFS: %5u initiated write call "
815 "(req %s/%lld, %u bytes @ offset %llu)\n",
816 data->task.tk_pid,
817 inode->i_sb->s_id,
818 (long long)NFS_FILEID(inode),
819 data->args.count,
820 (unsigned long long)data->args.offset);
821
822 task = rpc_run_task(&task_setup_data);
823 if (IS_ERR(task)) {
824 ret = PTR_ERR(task);
825 goto out;
826 }
827 if (how & FLUSH_SYNC) {
828 ret = rpc_wait_for_completion_task(task);
829 if (ret == 0)
830 ret = task->tk_status;
831 }
832 rpc_put_task(task);
833out:
834 return ret;
835}
836EXPORT_SYMBOL_GPL(nfs_initiate_write);
837
838/*
839 * Set up the argument/result storage required for the RPC call.
840 */
841static int nfs_write_rpcsetup(struct nfs_page *req,
842 struct nfs_write_data *data,
843 const struct rpc_call_ops *call_ops,
844 unsigned int count, unsigned int offset,
845 struct pnfs_layout_segment *lseg,
846 int how)
847{
848 struct inode *inode = req->wb_context->path.dentry->d_inode;
849
813 /* Set up the RPC argument and reply structs 850 /* Set up the RPC argument and reply structs
814 * NB: take care not to mess about with data->commit et al. */ 851 * NB: take care not to mess about with data->commit et al. */
815 852
816 data->req = req; 853 data->req = req;
817 data->inode = inode = req->wb_context->path.dentry->d_inode; 854 data->inode = inode = req->wb_context->path.dentry->d_inode;
818 data->cred = msg.rpc_cred; 855 data->cred = req->wb_context->cred;
856 data->lseg = get_lseg(lseg);
819 857
820 data->args.fh = NFS_FH(inode); 858 data->args.fh = NFS_FH(inode);
821 data->args.offset = req_offset(req) + offset; 859 data->args.offset = req_offset(req) + offset;
@@ -836,30 +874,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
836 data->res.verf = &data->verf; 874 data->res.verf = &data->verf;
837 nfs_fattr_init(&data->fattr); 875 nfs_fattr_init(&data->fattr);
838 876
839 /* Set up the initial task struct. */ 877 if (data->lseg &&
840 NFS_PROTO(inode)->write_setup(data, &msg); 878 (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
841 879 return 0;
842 dprintk("NFS: %5u initiated write call "
843 "(req %s/%lld, %u bytes @ offset %llu)\n",
844 data->task.tk_pid,
845 inode->i_sb->s_id,
846 (long long)NFS_FILEID(inode),
847 count,
848 (unsigned long long)data->args.offset);
849 880
850 task = rpc_run_task(&task_setup_data); 881 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
851 if (IS_ERR(task)) {
852 ret = PTR_ERR(task);
853 goto out;
854 }
855 if (how & FLUSH_SYNC) {
856 ret = rpc_wait_for_completion_task(task);
857 if (ret == 0)
858 ret = task->tk_status;
859 }
860 rpc_put_task(task);
861out:
862 return ret;
863} 882}
864 883
865/* If a nfs_flush_* function fails, it should remove reqs from @head and 884/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -879,20 +898,21 @@ static void nfs_redirty_request(struct nfs_page *req)
879 * Generate multiple small requests to write out a single 898 * Generate multiple small requests to write out a single
880 * contiguous dirty area on one page. 899 * contiguous dirty area on one page.
881 */ 900 */
882static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 901static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
883{ 902{
884 struct nfs_page *req = nfs_list_entry(head->next); 903 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
885 struct page *page = req->wb_page; 904 struct page *page = req->wb_page;
886 struct nfs_write_data *data; 905 struct nfs_write_data *data;
887 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 906 size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
888 unsigned int offset; 907 unsigned int offset;
889 int requests = 0; 908 int requests = 0;
890 int ret = 0; 909 int ret = 0;
910 struct pnfs_layout_segment *lseg;
891 LIST_HEAD(list); 911 LIST_HEAD(list);
892 912
893 nfs_list_remove_request(req); 913 nfs_list_remove_request(req);
894 914
895 nbytes = count; 915 nbytes = desc->pg_count;
896 do { 916 do {
897 size_t len = min(nbytes, wsize); 917 size_t len = min(nbytes, wsize);
898 918
@@ -905,9 +925,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
905 } while (nbytes != 0); 925 } while (nbytes != 0);
906 atomic_set(&req->wb_complete, requests); 926 atomic_set(&req->wb_complete, requests);
907 927
928 BUG_ON(desc->pg_lseg);
929 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
908 ClearPageError(page); 930 ClearPageError(page);
909 offset = 0; 931 offset = 0;
910 nbytes = count; 932 nbytes = desc->pg_count;
911 do { 933 do {
912 int ret2; 934 int ret2;
913 935
@@ -919,13 +941,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
919 if (nbytes < wsize) 941 if (nbytes < wsize)
920 wsize = nbytes; 942 wsize = nbytes;
921 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 943 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
922 wsize, offset, how); 944 wsize, offset, lseg, desc->pg_ioflags);
923 if (ret == 0) 945 if (ret == 0)
924 ret = ret2; 946 ret = ret2;
925 offset += wsize; 947 offset += wsize;
926 nbytes -= wsize; 948 nbytes -= wsize;
927 } while (nbytes != 0); 949 } while (nbytes != 0);
928 950
951 put_lseg(lseg);
952 desc->pg_lseg = NULL;
929 return ret; 953 return ret;
930 954
931out_bad: 955out_bad:
@@ -946,16 +970,26 @@ out_bad:
946 * This is the case if nfs_updatepage detects a conflicting request 970 * This is the case if nfs_updatepage detects a conflicting request
947 * that has been written but not committed. 971 * that has been written but not committed.
948 */ 972 */
949static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 973static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
950{ 974{
951 struct nfs_page *req; 975 struct nfs_page *req;
952 struct page **pages; 976 struct page **pages;
953 struct nfs_write_data *data; 977 struct nfs_write_data *data;
978 struct list_head *head = &desc->pg_list;
979 struct pnfs_layout_segment *lseg = desc->pg_lseg;
980 int ret;
954 981
955 data = nfs_writedata_alloc(npages); 982 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
956 if (!data) 983 desc->pg_count));
957 goto out_bad; 984 if (!data) {
958 985 while (!list_empty(head)) {
986 req = nfs_list_entry(head->next);
987 nfs_list_remove_request(req);
988 nfs_redirty_request(req);
989 }
990 ret = -ENOMEM;
991 goto out;
992 }
959 pages = data->pagevec; 993 pages = data->pagevec;
960 while (!list_empty(head)) { 994 while (!list_empty(head)) {
961 req = nfs_list_entry(head->next); 995 req = nfs_list_entry(head->next);
@@ -965,16 +999,15 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
965 *pages++ = req->wb_page; 999 *pages++ = req->wb_page;
966 } 1000 }
967 req = nfs_list_entry(data->pages.next); 1001 req = nfs_list_entry(data->pages.next);
1002 if ((!lseg) && list_is_singular(&data->pages))
1003 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
968 1004
969 /* Set up the argument struct */ 1005 /* Set up the argument struct */
970 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 1006 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
971 out_bad: 1007out:
972 while (!list_empty(head)) { 1008 put_lseg(lseg); /* Cleans any gotten in ->pg_test */
973 req = nfs_list_entry(head->next); 1009 desc->pg_lseg = NULL;
974 nfs_list_remove_request(req); 1010 return ret;
975 nfs_redirty_request(req);
976 }
977 return -ENOMEM;
978} 1011}
979 1012
980static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1013static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -982,6 +1015,8 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
982{ 1015{
983 size_t wsize = NFS_SERVER(inode)->wsize; 1016 size_t wsize = NFS_SERVER(inode)->wsize;
984 1017
1018 pnfs_pageio_init_write(pgio, inode);
1019
985 if (wsize < PAGE_CACHE_SIZE) 1020 if (wsize < PAGE_CACHE_SIZE)
986 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1021 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
987 else 1022 else
@@ -1132,7 +1167,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
1132/* 1167/*
1133 * This function is called when the WRITE call is complete. 1168 * This function is called when the WRITE call is complete.
1134 */ 1169 */
1135int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1170void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1136{ 1171{
1137 struct nfs_writeargs *argp = &data->args; 1172 struct nfs_writeargs *argp = &data->args;
1138 struct nfs_writeres *resp = &data->res; 1173 struct nfs_writeres *resp = &data->res;
@@ -1151,7 +1186,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1151 */ 1186 */
1152 status = NFS_PROTO(data->inode)->write_done(task, data); 1187 status = NFS_PROTO(data->inode)->write_done(task, data);
1153 if (status != 0) 1188 if (status != 0)
1154 return status; 1189 return;
1155 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1190 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1156 1191
1157#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1192#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -1166,6 +1201,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1166 */ 1201 */
1167 static unsigned long complain; 1202 static unsigned long complain;
1168 1203
1204 /* Note this will print the MDS for a DS write */
1169 if (time_before(complain, jiffies)) { 1205 if (time_before(complain, jiffies)) {
1170 dprintk("NFS: faulty NFS server %s:" 1206 dprintk("NFS: faulty NFS server %s:"
1171 " (committed = %d) != (stable = %d)\n", 1207 " (committed = %d) != (stable = %d)\n",
@@ -1186,6 +1222,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1186 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1222 /* Was this an NFSv2 write or an NFSv3 stable write? */
1187 if (resp->verf->committed != NFS_UNSTABLE) { 1223 if (resp->verf->committed != NFS_UNSTABLE) {
1188 /* Resend from where the server left off */ 1224 /* Resend from where the server left off */
1225 data->mds_offset += resp->count;
1189 argp->offset += resp->count; 1226 argp->offset += resp->count;
1190 argp->pgbase += resp->count; 1227 argp->pgbase += resp->count;
1191 argp->count -= resp->count; 1228 argp->count -= resp->count;
@@ -1196,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1196 argp->stable = NFS_FILE_SYNC; 1233 argp->stable = NFS_FILE_SYNC;
1197 } 1234 }
1198 nfs_restart_rpc(task, server->nfs_client); 1235 nfs_restart_rpc(task, server->nfs_client);
1199 return -EAGAIN; 1236 return;
1200 } 1237 }
1201 if (time_before(complain, jiffies)) { 1238 if (time_before(complain, jiffies)) {
1202 printk(KERN_WARNING 1239 printk(KERN_WARNING
@@ -1207,7 +1244,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1207 /* Can't do anything about it except throw an error. */ 1244 /* Can't do anything about it except throw an error. */
1208 task->tk_status = -EIO; 1245 task->tk_status = -EIO;
1209 } 1246 }
1210 return 0; 1247 return;
1211} 1248}
1212 1249
1213 1250
@@ -1292,6 +1329,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1292 task = rpc_run_task(&task_setup_data); 1329 task = rpc_run_task(&task_setup_data);
1293 if (IS_ERR(task)) 1330 if (IS_ERR(task))
1294 return PTR_ERR(task); 1331 return PTR_ERR(task);
1332 if (how & FLUSH_SYNC)
1333 rpc_wait_for_completion_task(task);
1295 rpc_put_task(task); 1334 rpc_put_task(task);
1296 return 0; 1335 return 0;
1297} 1336}
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index bf9cbd242ddd..124e8fcb0dd6 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -22,30 +22,17 @@
22 22
23static struct file *do_open(char *name, int flags) 23static struct file *do_open(char *name, int flags)
24{ 24{
25 struct nameidata nd;
26 struct vfsmount *mnt; 25 struct vfsmount *mnt;
27 int error; 26 struct file *file;
28 27
29 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); 28 mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
30 if (IS_ERR(mnt)) 29 if (IS_ERR(mnt))
31 return (struct file *)mnt; 30 return (struct file *)mnt;
32 31
33 error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); 32 file = file_open_root(mnt->mnt_root, mnt, name, flags);
34 mntput(mnt); /* drop do_kern_mount reference */
35 if (error)
36 return ERR_PTR(error);
37
38 if (flags == O_RDWR)
39 error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags);
40 else
41 error = may_open(&nd.path, MAY_WRITE, flags);
42 33
43 if (!error) 34 mntput(mnt); /* drop do_kern_mount reference */
44 return dentry_open(nd.path.dentry, nd.path.mnt, flags, 35 return file;
45 current_cred());
46
47 path_put(&nd.path);
48 return ERR_PTR(error);
49} 36}
50 37
51static struct { 38static struct {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9cc626b70fb6..2e1cebde90df 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -87,7 +87,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
87 .dentry = dget(dentry)}; 87 .dentry = dget(dentry)};
88 int err = 0; 88 int err = 0;
89 89
90 err = follow_down(&path, false); 90 err = follow_down(&path);
91 if (err < 0) 91 if (err < 0)
92 goto out; 92 goto out;
93 93
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index d7fd696e595c..0a0a66d98cce 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -521,8 +521,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
521 group_offset, bitmap)) 521 group_offset, bitmap))
522 printk(KERN_WARNING "%s: entry number %llu already freed\n", 522 printk(KERN_WARNING "%s: entry number %llu already freed\n",
523 __func__, (unsigned long long)req->pr_entry_nr); 523 __func__, (unsigned long long)req->pr_entry_nr);
524 524 else
525 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); 525 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
526 526
527 kunmap(req->pr_bitmap_bh->b_page); 527 kunmap(req->pr_bitmap_bh->b_page);
528 kunmap(req->pr_desc_bh->b_page); 528 kunmap(req->pr_desc_bh->b_page);
@@ -558,8 +558,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
558 group_offset, bitmap)) 558 group_offset, bitmap))
559 printk(KERN_WARNING "%s: entry number %llu already freed\n", 559 printk(KERN_WARNING "%s: entry number %llu already freed\n",
560 __func__, (unsigned long long)req->pr_entry_nr); 560 __func__, (unsigned long long)req->pr_entry_nr);
561 561 else
562 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); 562 nilfs_palloc_group_desc_add_entries(inode, group, desc, 1);
563 563
564 kunmap(req->pr_bitmap_bh->b_page); 564 kunmap(req->pr_bitmap_bh->b_page);
565 kunmap(req->pr_desc_bh->b_page); 565 kunmap(req->pr_desc_bh->b_page);
@@ -665,7 +665,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
665 for (j = i, n = 0; 665 for (j = i, n = 0;
666 (j < nitems) && nilfs_palloc_group_is_in(inode, group, 666 (j < nitems) && nilfs_palloc_group_is_in(inode, group,
667 entry_nrs[j]); 667 entry_nrs[j]);
668 j++, n++) { 668 j++) {
669 nilfs_palloc_group(inode, entry_nrs[j], &group_offset); 669 nilfs_palloc_group(inode, entry_nrs[j], &group_offset);
670 if (!nilfs_clear_bit_atomic( 670 if (!nilfs_clear_bit_atomic(
671 nilfs_mdt_bgl_lock(inode, group), 671 nilfs_mdt_bgl_lock(inode, group),
@@ -674,6 +674,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
674 "%s: entry number %llu already freed\n", 674 "%s: entry number %llu already freed\n",
675 __func__, 675 __func__,
676 (unsigned long long)entry_nrs[j]); 676 (unsigned long long)entry_nrs[j]);
677 } else {
678 n++;
677 } 679 }
678 } 680 }
679 nilfs_palloc_group_desc_add_entries(inode, group, desc, n); 681 nilfs_palloc_group_desc_add_entries(inode, group, desc, n);
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 9af34a7e6e13..f5fde36b9e28 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -74,7 +74,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
74 74
75#define nilfs_set_bit_atomic ext2_set_bit_atomic 75#define nilfs_set_bit_atomic ext2_set_bit_atomic
76#define nilfs_clear_bit_atomic ext2_clear_bit_atomic 76#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
77#define nilfs_find_next_zero_bit ext2_find_next_zero_bit 77#define nilfs_find_next_zero_bit find_next_zero_bit_le
78 78
79/* 79/*
80 * persistent object allocator cache 80 * persistent object allocator cache
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 3ee67c67cc52..4723f04e9b12 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -25,7 +25,6 @@
25#include <linux/errno.h> 25#include <linux/errno.h>
26#include "nilfs.h" 26#include "nilfs.h"
27#include "bmap.h" 27#include "bmap.h"
28#include "sb.h"
29#include "btree.h" 28#include "btree.h"
30#include "direct.h" 29#include "direct.h"
31#include "btnode.h" 30#include "btnode.h"
@@ -425,17 +424,6 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap)
425/* 424/*
426 * Internal use only 425 * Internal use only
427 */ 426 */
428
429void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n)
430{
431 inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
432}
433
434void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
435{
436 inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n);
437}
438
439__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, 427__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
440 const struct buffer_head *bh) 428 const struct buffer_head *bh)
441{ 429{
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index bde1c0aa2e15..40d9f453d31c 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -240,9 +240,6 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
240__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); 240__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
241__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); 241__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
242 242
243void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
244void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
245
246 243
247/* Assume that bmap semaphore is locked. */ 244/* Assume that bmap semaphore is locked. */
248static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap) 245static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 300c2bc00c3f..d451ae0e0bf3 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1174,7 +1174,7 @@ static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1174 if (ret < 0) 1174 if (ret < 0)
1175 goto out; 1175 goto out;
1176 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1176 nilfs_btree_commit_insert(btree, path, level, key, ptr);
1177 nilfs_bmap_add_blocks(btree, stats.bs_nblocks); 1177 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1178 1178
1179 out: 1179 out:
1180 nilfs_btree_free_path(path); 1180 nilfs_btree_free_path(path);
@@ -1511,7 +1511,7 @@ static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1511 if (ret < 0) 1511 if (ret < 0)
1512 goto out; 1512 goto out;
1513 nilfs_btree_commit_delete(btree, path, level, dat); 1513 nilfs_btree_commit_delete(btree, path, level, dat);
1514 nilfs_bmap_sub_blocks(btree, stats.bs_nblocks); 1514 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1515 1515
1516out: 1516out:
1517 nilfs_btree_free_path(path); 1517 nilfs_btree_free_path(path);
@@ -1776,7 +1776,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1776 return ret; 1776 return ret;
1777 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1777 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1778 di, ni, bh); 1778 di, ni, bh);
1779 nilfs_bmap_add_blocks(btree, stats.bs_nblocks); 1779 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1780 return 0; 1780 return 0;
1781} 1781}
1782 1782
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 9d45773b79e6..3a1923943b14 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -440,7 +440,6 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
440 nilfs_commit_chunk(page, mapping, from, to); 440 nilfs_commit_chunk(page, mapping, from, to);
441 nilfs_put_page(page); 441 nilfs_put_page(page);
442 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 442 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
443/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
444} 443}
445 444
446/* 445/*
@@ -531,7 +530,6 @@ got_it:
531 nilfs_set_de_type(de, inode); 530 nilfs_set_de_type(de, inode);
532 nilfs_commit_chunk(page, page->mapping, from, to); 531 nilfs_commit_chunk(page, page->mapping, from, to);
533 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 532 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
534/* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */
535 nilfs_mark_inode_dirty(dir); 533 nilfs_mark_inode_dirty(dir);
536 /* OFFSET_CACHE */ 534 /* OFFSET_CACHE */
537out_put: 535out_put:
@@ -579,7 +577,6 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
579 dir->inode = 0; 577 dir->inode = 0;
580 nilfs_commit_chunk(page, mapping, from, to); 578 nilfs_commit_chunk(page, mapping, from, to);
581 inode->i_ctime = inode->i_mtime = CURRENT_TIME; 579 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
582/* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */
583out: 580out:
584 nilfs_put_page(page); 581 nilfs_put_page(page);
585 return err; 582 return err;
@@ -684,7 +681,7 @@ const struct file_operations nilfs_dir_operations = {
684 .readdir = nilfs_readdir, 681 .readdir = nilfs_readdir,
685 .unlocked_ioctl = nilfs_ioctl, 682 .unlocked_ioctl = nilfs_ioctl,
686#ifdef CONFIG_COMPAT 683#ifdef CONFIG_COMPAT
687 .compat_ioctl = nilfs_ioctl, 684 .compat_ioctl = nilfs_compat_ioctl,
688#endif /* CONFIG_COMPAT */ 685#endif /* CONFIG_COMPAT */
689 .fsync = nilfs_sync_file, 686 .fsync = nilfs_sync_file,
690 687
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 324d80c57518..82f4865e86dd 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -146,7 +146,7 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
146 if (NILFS_BMAP_USE_VBN(bmap)) 146 if (NILFS_BMAP_USE_VBN(bmap))
147 nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr); 147 nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr);
148 148
149 nilfs_bmap_add_blocks(bmap, 1); 149 nilfs_inode_add_blocks(bmap->b_inode, 1);
150 } 150 }
151 return ret; 151 return ret;
152} 152}
@@ -168,7 +168,7 @@ static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
168 if (!ret) { 168 if (!ret) {
169 nilfs_bmap_commit_end_ptr(bmap, &req, dat); 169 nilfs_bmap_commit_end_ptr(bmap, &req, dat);
170 nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR); 170 nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR);
171 nilfs_bmap_sub_blocks(bmap, 1); 171 nilfs_inode_sub_blocks(bmap->b_inode, 1);
172 } 172 }
173 return ret; 173 return ret;
174} 174}
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 2f560c9fb808..93589fccdd97 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -59,7 +59,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
59 struct nilfs_transaction_info ti; 59 struct nilfs_transaction_info ti;
60 int ret; 60 int ret;
61 61
62 if (unlikely(nilfs_near_disk_full(NILFS_SB(inode->i_sb)->s_nilfs))) 62 if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
63 return VM_FAULT_SIGBUS; /* -ENOSPC */ 63 return VM_FAULT_SIGBUS; /* -ENOSPC */
64 64
65 lock_page(page); 65 lock_page(page);
@@ -142,7 +142,7 @@ const struct file_operations nilfs_file_operations = {
142 .aio_write = generic_file_aio_write, 142 .aio_write = generic_file_aio_write,
143 .unlocked_ioctl = nilfs_ioctl, 143 .unlocked_ioctl = nilfs_ioctl,
144#ifdef CONFIG_COMPAT 144#ifdef CONFIG_COMPAT
145 .compat_ioctl = nilfs_ioctl, 145 .compat_ioctl = nilfs_compat_ioctl,
146#endif /* CONFIG_COMPAT */ 146#endif /* CONFIG_COMPAT */
147 .mmap = nilfs_file_mmap, 147 .mmap = nilfs_file_mmap,
148 .open = generic_file_open, 148 .open = generic_file_open,
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 2fd440d8d6b8..d5625be236a8 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -41,6 +41,24 @@ struct nilfs_iget_args {
41 int for_gc; 41 int for_gc;
42}; 42};
43 43
44void nilfs_inode_add_blocks(struct inode *inode, int n)
45{
46 struct nilfs_root *root = NILFS_I(inode)->i_root;
47
48 inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
49 if (root)
50 atomic_add(n, &root->blocks_count);
51}
52
53void nilfs_inode_sub_blocks(struct inode *inode, int n)
54{
55 struct nilfs_root *root = NILFS_I(inode)->i_root;
56
57 inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
58 if (root)
59 atomic_sub(n, &root->blocks_count);
60}
61
44/** 62/**
45 * nilfs_get_block() - get a file block on the filesystem (callback function) 63 * nilfs_get_block() - get a file block on the filesystem (callback function)
46 * @inode - inode struct of the target file 64 * @inode - inode struct of the target file
@@ -277,7 +295,7 @@ const struct address_space_operations nilfs_aops = {
277struct inode *nilfs_new_inode(struct inode *dir, int mode) 295struct inode *nilfs_new_inode(struct inode *dir, int mode)
278{ 296{
279 struct super_block *sb = dir->i_sb; 297 struct super_block *sb = dir->i_sb;
280 struct nilfs_sb_info *sbi = NILFS_SB(sb); 298 struct the_nilfs *nilfs = sb->s_fs_info;
281 struct inode *inode; 299 struct inode *inode;
282 struct nilfs_inode_info *ii; 300 struct nilfs_inode_info *ii;
283 struct nilfs_root *root; 301 struct nilfs_root *root;
@@ -315,19 +333,16 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode)
315 /* No lock is needed; iget() ensures it. */ 333 /* No lock is needed; iget() ensures it. */
316 } 334 }
317 335
318 ii->i_flags = NILFS_I(dir)->i_flags; 336 ii->i_flags = nilfs_mask_flags(
319 if (S_ISLNK(mode)) 337 mode, NILFS_I(dir)->i_flags & NILFS_FL_INHERITED);
320 ii->i_flags &= ~(NILFS_IMMUTABLE_FL | NILFS_APPEND_FL);
321 if (!S_ISDIR(mode))
322 ii->i_flags &= ~NILFS_DIRSYNC_FL;
323 338
324 /* ii->i_file_acl = 0; */ 339 /* ii->i_file_acl = 0; */
325 /* ii->i_dir_acl = 0; */ 340 /* ii->i_dir_acl = 0; */
326 ii->i_dir_start_lookup = 0; 341 ii->i_dir_start_lookup = 0;
327 nilfs_set_inode_flags(inode); 342 nilfs_set_inode_flags(inode);
328 spin_lock(&sbi->s_next_gen_lock); 343 spin_lock(&nilfs->ns_next_gen_lock);
329 inode->i_generation = sbi->s_next_generation++; 344 inode->i_generation = nilfs->ns_next_generation++;
330 spin_unlock(&sbi->s_next_gen_lock); 345 spin_unlock(&nilfs->ns_next_gen_lock);
331 insert_inode_hash(inode); 346 insert_inode_hash(inode);
332 347
333 err = nilfs_init_acl(inode, dir); 348 err = nilfs_init_acl(inode, dir);
@@ -359,17 +374,15 @@ void nilfs_set_inode_flags(struct inode *inode)
359 374
360 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | 375 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME |
361 S_DIRSYNC); 376 S_DIRSYNC);
362 if (flags & NILFS_SYNC_FL) 377 if (flags & FS_SYNC_FL)
363 inode->i_flags |= S_SYNC; 378 inode->i_flags |= S_SYNC;
364 if (flags & NILFS_APPEND_FL) 379 if (flags & FS_APPEND_FL)
365 inode->i_flags |= S_APPEND; 380 inode->i_flags |= S_APPEND;
366 if (flags & NILFS_IMMUTABLE_FL) 381 if (flags & FS_IMMUTABLE_FL)
367 inode->i_flags |= S_IMMUTABLE; 382 inode->i_flags |= S_IMMUTABLE;
368#ifndef NILFS_ATIME_DISABLE 383 if (flags & FS_NOATIME_FL)
369 if (flags & NILFS_NOATIME_FL)
370#endif
371 inode->i_flags |= S_NOATIME; 384 inode->i_flags |= S_NOATIME;
372 if (flags & NILFS_DIRSYNC_FL) 385 if (flags & FS_DIRSYNC_FL)
373 inode->i_flags |= S_DIRSYNC; 386 inode->i_flags |= S_DIRSYNC;
374 mapping_set_gfp_mask(inode->i_mapping, 387 mapping_set_gfp_mask(inode->i_mapping,
375 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); 388 mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
@@ -420,7 +433,7 @@ static int __nilfs_read_inode(struct super_block *sb,
420 struct nilfs_root *root, unsigned long ino, 433 struct nilfs_root *root, unsigned long ino,
421 struct inode *inode) 434 struct inode *inode)
422{ 435{
423 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 436 struct the_nilfs *nilfs = sb->s_fs_info;
424 struct buffer_head *bh; 437 struct buffer_head *bh;
425 struct nilfs_inode *raw_inode; 438 struct nilfs_inode *raw_inode;
426 int err; 439 int err;
@@ -707,6 +720,7 @@ void nilfs_evict_inode(struct inode *inode)
707 struct nilfs_transaction_info ti; 720 struct nilfs_transaction_info ti;
708 struct super_block *sb = inode->i_sb; 721 struct super_block *sb = inode->i_sb;
709 struct nilfs_inode_info *ii = NILFS_I(inode); 722 struct nilfs_inode_info *ii = NILFS_I(inode);
723 int ret;
710 724
711 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { 725 if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) {
712 if (inode->i_data.nrpages) 726 if (inode->i_data.nrpages)
@@ -725,8 +739,9 @@ void nilfs_evict_inode(struct inode *inode)
725 nilfs_mark_inode_dirty(inode); 739 nilfs_mark_inode_dirty(inode);
726 end_writeback(inode); 740 end_writeback(inode);
727 741
728 nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); 742 ret = nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino);
729 atomic_dec(&ii->i_root->inodes_count); 743 if (!ret)
744 atomic_dec(&ii->i_root->inodes_count);
730 745
731 nilfs_clear_inode(inode); 746 nilfs_clear_inode(inode);
732 747
@@ -792,18 +807,18 @@ int nilfs_permission(struct inode *inode, int mask, unsigned int flags)
792 807
793int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 808int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
794{ 809{
795 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); 810 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
796 struct nilfs_inode_info *ii = NILFS_I(inode); 811 struct nilfs_inode_info *ii = NILFS_I(inode);
797 int err; 812 int err;
798 813
799 spin_lock(&sbi->s_inode_lock); 814 spin_lock(&nilfs->ns_inode_lock);
800 if (ii->i_bh == NULL) { 815 if (ii->i_bh == NULL) {
801 spin_unlock(&sbi->s_inode_lock); 816 spin_unlock(&nilfs->ns_inode_lock);
802 err = nilfs_ifile_get_inode_block(ii->i_root->ifile, 817 err = nilfs_ifile_get_inode_block(ii->i_root->ifile,
803 inode->i_ino, pbh); 818 inode->i_ino, pbh);
804 if (unlikely(err)) 819 if (unlikely(err))
805 return err; 820 return err;
806 spin_lock(&sbi->s_inode_lock); 821 spin_lock(&nilfs->ns_inode_lock);
807 if (ii->i_bh == NULL) 822 if (ii->i_bh == NULL)
808 ii->i_bh = *pbh; 823 ii->i_bh = *pbh;
809 else { 824 else {
@@ -814,36 +829,36 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
814 *pbh = ii->i_bh; 829 *pbh = ii->i_bh;
815 830
816 get_bh(*pbh); 831 get_bh(*pbh);
817 spin_unlock(&sbi->s_inode_lock); 832 spin_unlock(&nilfs->ns_inode_lock);
818 return 0; 833 return 0;
819} 834}
820 835
821int nilfs_inode_dirty(struct inode *inode) 836int nilfs_inode_dirty(struct inode *inode)
822{ 837{
823 struct nilfs_inode_info *ii = NILFS_I(inode); 838 struct nilfs_inode_info *ii = NILFS_I(inode);
824 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb); 839 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
825 int ret = 0; 840 int ret = 0;
826 841
827 if (!list_empty(&ii->i_dirty)) { 842 if (!list_empty(&ii->i_dirty)) {
828 spin_lock(&sbi->s_inode_lock); 843 spin_lock(&nilfs->ns_inode_lock);
829 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) || 844 ret = test_bit(NILFS_I_DIRTY, &ii->i_state) ||
830 test_bit(NILFS_I_BUSY, &ii->i_state); 845 test_bit(NILFS_I_BUSY, &ii->i_state);
831 spin_unlock(&sbi->s_inode_lock); 846 spin_unlock(&nilfs->ns_inode_lock);
832 } 847 }
833 return ret; 848 return ret;
834} 849}
835 850
836int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty) 851int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
837{ 852{
838 struct nilfs_sb_info *sbi = NILFS_SB(inode->i_sb);
839 struct nilfs_inode_info *ii = NILFS_I(inode); 853 struct nilfs_inode_info *ii = NILFS_I(inode);
854 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
840 855
841 atomic_add(nr_dirty, &sbi->s_nilfs->ns_ndirtyblks); 856 atomic_add(nr_dirty, &nilfs->ns_ndirtyblks);
842 857
843 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state)) 858 if (test_and_set_bit(NILFS_I_DIRTY, &ii->i_state))
844 return 0; 859 return 0;
845 860
846 spin_lock(&sbi->s_inode_lock); 861 spin_lock(&nilfs->ns_inode_lock);
847 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 862 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
848 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 863 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
849 /* Because this routine may race with nilfs_dispose_list(), 864 /* Because this routine may race with nilfs_dispose_list(),
@@ -851,18 +866,18 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty)
851 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) { 866 if (list_empty(&ii->i_dirty) && igrab(inode) == NULL) {
852 /* This will happen when somebody is freeing 867 /* This will happen when somebody is freeing
853 this inode. */ 868 this inode. */
854 nilfs_warning(sbi->s_super, __func__, 869 nilfs_warning(inode->i_sb, __func__,
855 "cannot get inode (ino=%lu)\n", 870 "cannot get inode (ino=%lu)\n",
856 inode->i_ino); 871 inode->i_ino);
857 spin_unlock(&sbi->s_inode_lock); 872 spin_unlock(&nilfs->ns_inode_lock);
858 return -EINVAL; /* NILFS_I_DIRTY may remain for 873 return -EINVAL; /* NILFS_I_DIRTY may remain for
859 freeing inode */ 874 freeing inode */
860 } 875 }
861 list_del(&ii->i_dirty); 876 list_del(&ii->i_dirty);
862 list_add_tail(&ii->i_dirty, &sbi->s_dirty_files); 877 list_add_tail(&ii->i_dirty, &nilfs->ns_dirty_files);
863 set_bit(NILFS_I_QUEUED, &ii->i_state); 878 set_bit(NILFS_I_QUEUED, &ii->i_state);
864 } 879 }
865 spin_unlock(&sbi->s_inode_lock); 880 spin_unlock(&nilfs->ns_inode_lock);
866 return 0; 881 return 0;
867} 882}
868 883
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 496738963fdb..f2469ba6246b 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -26,7 +26,9 @@
26#include <linux/capability.h> /* capable() */ 26#include <linux/capability.h> /* capable() */
27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ 27#include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */
28#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
29#include <linux/compat.h> /* compat_ptr() */
29#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */ 30#include <linux/mount.h> /* mnt_want_write(), mnt_drop_write() */
31#include <linux/buffer_head.h>
30#include <linux/nilfs2_fs.h> 32#include <linux/nilfs2_fs.h>
31#include "nilfs.h" 33#include "nilfs.h"
32#include "segment.h" 34#include "segment.h"
@@ -97,11 +99,74 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
97 return ret; 99 return ret;
98} 100}
99 101
102static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp)
103{
104 unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE;
105
106 return put_user(flags, (int __user *)argp);
107}
108
109static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
110 void __user *argp)
111{
112 struct nilfs_transaction_info ti;
113 unsigned int flags, oldflags;
114 int ret;
115
116 if (!inode_owner_or_capable(inode))
117 return -EACCES;
118
119 if (get_user(flags, (int __user *)argp))
120 return -EFAULT;
121
122 ret = mnt_want_write(filp->f_path.mnt);
123 if (ret)
124 return ret;
125
126 flags = nilfs_mask_flags(inode->i_mode, flags);
127
128 mutex_lock(&inode->i_mutex);
129
130 oldflags = NILFS_I(inode)->i_flags;
131
132 /*
133 * The IMMUTABLE and APPEND_ONLY flags can only be changed by the
134 * relevant capability.
135 */
136 ret = -EPERM;
137 if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) &&
138 !capable(CAP_LINUX_IMMUTABLE))
139 goto out;
140
141 ret = nilfs_transaction_begin(inode->i_sb, &ti, 0);
142 if (ret)
143 goto out;
144
145 NILFS_I(inode)->i_flags = (oldflags & ~FS_FL_USER_MODIFIABLE) |
146 (flags & FS_FL_USER_MODIFIABLE);
147
148 nilfs_set_inode_flags(inode);
149 inode->i_ctime = CURRENT_TIME;
150 if (IS_SYNC(inode))
151 nilfs_set_transaction_flag(NILFS_TI_SYNC);
152
153 nilfs_mark_inode_dirty(inode);
154 ret = nilfs_transaction_commit(inode->i_sb);
155out:
156 mutex_unlock(&inode->i_mutex);
157 mnt_drop_write(filp->f_path.mnt);
158 return ret;
159}
160
161static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp)
162{
163 return put_user(inode->i_generation, (int __user *)argp);
164}
165
100static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, 166static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
101 unsigned int cmd, void __user *argp) 167 unsigned int cmd, void __user *argp)
102{ 168{
103 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 169 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
104 struct inode *cpfile = nilfs->ns_cpfile;
105 struct nilfs_transaction_info ti; 170 struct nilfs_transaction_info ti;
106 struct nilfs_cpmode cpmode; 171 struct nilfs_cpmode cpmode;
107 int ret; 172 int ret;
@@ -121,7 +186,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
121 186
122 nilfs_transaction_begin(inode->i_sb, &ti, 0); 187 nilfs_transaction_begin(inode->i_sb, &ti, 0);
123 ret = nilfs_cpfile_change_cpmode( 188 ret = nilfs_cpfile_change_cpmode(
124 cpfile, cpmode.cm_cno, cpmode.cm_mode); 189 nilfs->ns_cpfile, cpmode.cm_cno, cpmode.cm_mode);
125 if (unlikely(ret < 0)) 190 if (unlikely(ret < 0))
126 nilfs_transaction_abort(inode->i_sb); 191 nilfs_transaction_abort(inode->i_sb);
127 else 192 else
@@ -137,7 +202,7 @@ static int
137nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp, 202nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
138 unsigned int cmd, void __user *argp) 203 unsigned int cmd, void __user *argp)
139{ 204{
140 struct inode *cpfile = NILFS_SB(inode->i_sb)->s_nilfs->ns_cpfile; 205 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
141 struct nilfs_transaction_info ti; 206 struct nilfs_transaction_info ti;
142 __u64 cno; 207 __u64 cno;
143 int ret; 208 int ret;
@@ -154,7 +219,7 @@ nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
154 goto out; 219 goto out;
155 220
156 nilfs_transaction_begin(inode->i_sb, &ti, 0); 221 nilfs_transaction_begin(inode->i_sb, &ti, 0);
157 ret = nilfs_cpfile_delete_checkpoint(cpfile, cno); 222 ret = nilfs_cpfile_delete_checkpoint(nilfs->ns_cpfile, cno);
158 if (unlikely(ret < 0)) 223 if (unlikely(ret < 0))
159 nilfs_transaction_abort(inode->i_sb); 224 nilfs_transaction_abort(inode->i_sb);
160 else 225 else
@@ -180,7 +245,7 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
180static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp, 245static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
181 unsigned int cmd, void __user *argp) 246 unsigned int cmd, void __user *argp)
182{ 247{
183 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 248 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
184 struct nilfs_cpstat cpstat; 249 struct nilfs_cpstat cpstat;
185 int ret; 250 int ret;
186 251
@@ -211,7 +276,7 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
211static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp, 276static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
212 unsigned int cmd, void __user *argp) 277 unsigned int cmd, void __user *argp)
213{ 278{
214 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 279 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
215 struct nilfs_sustat sustat; 280 struct nilfs_sustat sustat;
216 int ret; 281 int ret;
217 282
@@ -267,7 +332,7 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
267static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp, 332static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
268 unsigned int cmd, void __user *argp) 333 unsigned int cmd, void __user *argp)
269{ 334{
270 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 335 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
271 struct nilfs_argv argv; 336 struct nilfs_argv argv;
272 int ret; 337 int ret;
273 338
@@ -336,7 +401,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
336 struct nilfs_argv *argv, void *buf) 401 struct nilfs_argv *argv, void *buf)
337{ 402{
338 size_t nmembs = argv->v_nmembs; 403 size_t nmembs = argv->v_nmembs;
339 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 404 struct the_nilfs *nilfs = sb->s_fs_info;
340 struct inode *inode; 405 struct inode *inode;
341 struct nilfs_vdesc *vdesc; 406 struct nilfs_vdesc *vdesc;
342 struct buffer_head *bh, *n; 407 struct buffer_head *bh, *n;
@@ -550,7 +615,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
550 ret = PTR_ERR(kbufs[4]); 615 ret = PTR_ERR(kbufs[4]);
551 goto out; 616 goto out;
552 } 617 }
553 nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 618 nilfs = inode->i_sb->s_fs_info;
554 619
555 for (n = 0; n < 4; n++) { 620 for (n = 0; n < 4; n++) {
556 ret = -EINVAL; 621 ret = -EINVAL;
@@ -623,7 +688,7 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
623 return ret; 688 return ret;
624 689
625 if (argp != NULL) { 690 if (argp != NULL) {
626 nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 691 nilfs = inode->i_sb->s_fs_info;
627 down_read(&nilfs->ns_segctor_sem); 692 down_read(&nilfs->ns_segctor_sem);
628 cno = nilfs->ns_cno - 1; 693 cno = nilfs->ns_cno - 1;
629 up_read(&nilfs->ns_segctor_sem); 694 up_read(&nilfs->ns_segctor_sem);
@@ -641,7 +706,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
641 void *, size_t, size_t)) 706 void *, size_t, size_t))
642 707
643{ 708{
644 struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; 709 struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
645 struct nilfs_argv argv; 710 struct nilfs_argv argv;
646 int ret; 711 int ret;
647 712
@@ -666,6 +731,12 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
666 void __user *argp = (void __user *)arg; 731 void __user *argp = (void __user *)arg;
667 732
668 switch (cmd) { 733 switch (cmd) {
734 case FS_IOC_GETFLAGS:
735 return nilfs_ioctl_getflags(inode, argp);
736 case FS_IOC_SETFLAGS:
737 return nilfs_ioctl_setflags(inode, filp, argp);
738 case FS_IOC_GETVERSION:
739 return nilfs_ioctl_getversion(inode, argp);
669 case NILFS_IOCTL_CHANGE_CPMODE: 740 case NILFS_IOCTL_CHANGE_CPMODE:
670 return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp); 741 return nilfs_ioctl_change_cpmode(inode, filp, cmd, argp);
671 case NILFS_IOCTL_DELETE_CHECKPOINT: 742 case NILFS_IOCTL_DELETE_CHECKPOINT:
@@ -696,3 +767,23 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
696 return -ENOTTY; 767 return -ENOTTY;
697 } 768 }
698} 769}
770
771#ifdef CONFIG_COMPAT
772long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
773{
774 switch (cmd) {
775 case FS_IOC32_GETFLAGS:
776 cmd = FS_IOC_GETFLAGS;
777 break;
778 case FS_IOC32_SETFLAGS:
779 cmd = FS_IOC_SETFLAGS;
780 break;
781 case FS_IOC32_GETVERSION:
782 cmd = FS_IOC_GETVERSION;
783 break;
784 default:
785 return -ENOIOCTLCMD;
786 }
787 return nilfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg));
788}
789#endif
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index b13734bf3521..ed68563ec708 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -66,7 +66,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode)
66 66
67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) 67static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode)
68{ 68{
69 return NILFS_SB(inode->i_sb)->s_nilfs; 69 return inode->i_sb->s_fs_info;
70} 70}
71 71
72/* Default GFP flags using highmem */ 72/* Default GFP flags using highmem */
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 98034271cd02..546849b3e88f 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -397,7 +397,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page); 397 new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_page);
398 if (!new_de) 398 if (!new_de)
399 goto out_dir; 399 goto out_dir;
400 inc_nlink(old_inode);
401 nilfs_set_link(new_dir, new_de, new_page, old_inode); 400 nilfs_set_link(new_dir, new_de, new_page, old_inode);
402 nilfs_mark_inode_dirty(new_dir); 401 nilfs_mark_inode_dirty(new_dir);
403 new_inode->i_ctime = CURRENT_TIME; 402 new_inode->i_ctime = CURRENT_TIME;
@@ -411,13 +410,9 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
411 if (new_dir->i_nlink >= NILFS_LINK_MAX) 410 if (new_dir->i_nlink >= NILFS_LINK_MAX)
412 goto out_dir; 411 goto out_dir;
413 } 412 }
414 inc_nlink(old_inode);
415 err = nilfs_add_link(new_dentry, old_inode); 413 err = nilfs_add_link(new_dentry, old_inode);
416 if (err) { 414 if (err)
417 drop_nlink(old_inode);
418 nilfs_mark_inode_dirty(old_inode);
419 goto out_dir; 415 goto out_dir;
420 }
421 if (dir_de) { 416 if (dir_de) {
422 inc_nlink(new_dir); 417 inc_nlink(new_dir);
423 nilfs_mark_inode_dirty(new_dir); 418 nilfs_mark_inode_dirty(new_dir);
@@ -431,7 +426,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
431 old_inode->i_ctime = CURRENT_TIME; 426 old_inode->i_ctime = CURRENT_TIME;
432 427
433 nilfs_delete_entry(old_de, old_page); 428 nilfs_delete_entry(old_de, old_page);
434 drop_nlink(old_inode);
435 429
436 if (dir_de) { 430 if (dir_de) {
437 nilfs_set_link(old_inode, dir_de, dir_page, new_dir); 431 nilfs_set_link(old_inode, dir_de, dir_page, new_dir);
@@ -488,7 +482,7 @@ static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno,
488 if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO) 482 if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO)
489 return ERR_PTR(-ESTALE); 483 return ERR_PTR(-ESTALE);
490 484
491 root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); 485 root = nilfs_lookup_root(sb->s_fs_info, cno);
492 if (!root) 486 if (!root)
493 return ERR_PTR(-ESTALE); 487 return ERR_PTR(-ESTALE);
494 488
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 777e8fd04304..856e8e4e0b74 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -30,7 +30,6 @@
30#include <linux/blkdev.h> 30#include <linux/blkdev.h>
31#include <linux/nilfs2_fs.h> 31#include <linux/nilfs2_fs.h>
32#include "the_nilfs.h" 32#include "the_nilfs.h"
33#include "sb.h"
34#include "bmap.h" 33#include "bmap.h"
35 34
36/* 35/*
@@ -122,7 +121,7 @@ enum {
122#define NILFS_SYS_INO_BITS \ 121#define NILFS_SYS_INO_BITS \
123 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS) 122 ((unsigned int)(1 << NILFS_ROOT_INO) | NILFS_MDT_INO_BITS)
124 123
125#define NILFS_FIRST_INO(sb) (NILFS_SB(sb)->s_nilfs->ns_first_ino) 124#define NILFS_FIRST_INO(sb) (((struct the_nilfs *)sb->s_fs_info)->ns_first_ino)
126 125
127#define NILFS_MDT_INODE(sb, ino) \ 126#define NILFS_MDT_INODE(sb, ino) \
128 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino)))) 127 ((ino) < NILFS_FIRST_INO(sb) && (NILFS_MDT_INO_BITS & (1 << (ino))))
@@ -212,6 +211,23 @@ static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
212 211
213#define NILFS_ATIME_DISABLE 212#define NILFS_ATIME_DISABLE
214 213
214/* Flags that should be inherited by new inodes from their parent. */
215#define NILFS_FL_INHERITED \
216 (FS_SECRM_FL | FS_UNRM_FL | FS_COMPR_FL | FS_SYNC_FL | \
217 FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL |\
218 FS_COMPRBLK_FL | FS_NOCOMP_FL | FS_NOTAIL_FL | FS_DIRSYNC_FL)
219
220/* Mask out flags that are inappropriate for the given type of inode. */
221static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags)
222{
223 if (S_ISDIR(mode))
224 return flags;
225 else if (S_ISREG(mode))
226 return flags & ~(FS_DIRSYNC_FL | FS_TOPDIR_FL);
227 else
228 return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
229}
230
215/* dir.c */ 231/* dir.c */
216extern int nilfs_add_link(struct dentry *, struct inode *); 232extern int nilfs_add_link(struct dentry *, struct inode *);
217extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); 233extern ino_t nilfs_inode_by_name(struct inode *, const struct qstr *);
@@ -229,10 +245,13 @@ extern int nilfs_sync_file(struct file *, int);
229 245
230/* ioctl.c */ 246/* ioctl.c */
231long nilfs_ioctl(struct file *, unsigned int, unsigned long); 247long nilfs_ioctl(struct file *, unsigned int, unsigned long);
248long nilfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
232int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *, 249int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *, struct nilfs_argv *,
233 void **); 250 void **);
234 251
235/* inode.c */ 252/* inode.c */
253void nilfs_inode_add_blocks(struct inode *inode, int n);
254void nilfs_inode_sub_blocks(struct inode *inode, int n);
236extern struct inode *nilfs_new_inode(struct inode *, int); 255extern struct inode *nilfs_new_inode(struct inode *, int);
237extern void nilfs_free_inode(struct inode *); 256extern void nilfs_free_inode(struct inode *);
238extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); 257extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
@@ -275,11 +294,11 @@ extern int nilfs_check_feature_compatibility(struct super_block *,
275 struct nilfs_super_block *); 294 struct nilfs_super_block *);
276extern void nilfs_set_log_cursor(struct nilfs_super_block *, 295extern void nilfs_set_log_cursor(struct nilfs_super_block *,
277 struct the_nilfs *); 296 struct the_nilfs *);
278extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *, 297struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
279 int flip); 298 int flip);
280extern int nilfs_commit_super(struct nilfs_sb_info *, int); 299int nilfs_commit_super(struct super_block *sb, int flag);
281extern int nilfs_cleanup_super(struct nilfs_sb_info *); 300int nilfs_cleanup_super(struct super_block *sb);
282int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, 301int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
283 struct nilfs_root **root); 302 struct nilfs_root **root);
284int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); 303int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno);
285 304
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 3dfcd3b7d389..ba4a64518f38 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -425,7 +425,7 @@ void nilfs_dispose_segment_list(struct list_head *head)
425} 425}
426 426
427static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, 427static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
428 struct nilfs_sb_info *sbi, 428 struct super_block *sb,
429 struct nilfs_recovery_info *ri) 429 struct nilfs_recovery_info *ri)
430{ 430{
431 struct list_head *head = &ri->ri_used_segments; 431 struct list_head *head = &ri->ri_used_segments;
@@ -501,7 +501,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
501} 501}
502 502
503static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, 503static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
504 struct nilfs_sb_info *sbi, 504 struct super_block *sb,
505 struct nilfs_root *root, 505 struct nilfs_root *root,
506 struct list_head *head, 506 struct list_head *head,
507 unsigned long *nr_salvaged_blocks) 507 unsigned long *nr_salvaged_blocks)
@@ -514,7 +514,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
514 int err = 0, err2 = 0; 514 int err = 0, err2 = 0;
515 515
516 list_for_each_entry_safe(rb, n, head, list) { 516 list_for_each_entry_safe(rb, n, head, list) {
517 inode = nilfs_iget(sbi->s_super, root, rb->ino); 517 inode = nilfs_iget(sb, root, rb->ino);
518 if (IS_ERR(inode)) { 518 if (IS_ERR(inode)) {
519 err = PTR_ERR(inode); 519 err = PTR_ERR(inode);
520 inode = NULL; 520 inode = NULL;
@@ -572,11 +572,11 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
572 * nilfs_do_roll_forward - salvage logical segments newer than the latest 572 * nilfs_do_roll_forward - salvage logical segments newer than the latest
573 * checkpoint 573 * checkpoint
574 * @nilfs: nilfs object 574 * @nilfs: nilfs object
575 * @sbi: nilfs_sb_info 575 * @sb: super block instance
576 * @ri: pointer to a nilfs_recovery_info 576 * @ri: pointer to a nilfs_recovery_info
577 */ 577 */
578static int nilfs_do_roll_forward(struct the_nilfs *nilfs, 578static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
579 struct nilfs_sb_info *sbi, 579 struct super_block *sb,
580 struct nilfs_root *root, 580 struct nilfs_root *root,
581 struct nilfs_recovery_info *ri) 581 struct nilfs_recovery_info *ri)
582{ 582{
@@ -648,7 +648,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
648 goto failed; 648 goto failed;
649 if (flags & NILFS_SS_LOGEND) { 649 if (flags & NILFS_SS_LOGEND) {
650 err = nilfs_recover_dsync_blocks( 650 err = nilfs_recover_dsync_blocks(
651 nilfs, sbi, root, &dsync_blocks, 651 nilfs, sb, root, &dsync_blocks,
652 &nsalvaged_blocks); 652 &nsalvaged_blocks);
653 if (unlikely(err)) 653 if (unlikely(err))
654 goto failed; 654 goto failed;
@@ -681,7 +681,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
681 681
682 if (nsalvaged_blocks) { 682 if (nsalvaged_blocks) {
683 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n", 683 printk(KERN_INFO "NILFS (device %s): salvaged %lu blocks\n",
684 sbi->s_super->s_id, nsalvaged_blocks); 684 sb->s_id, nsalvaged_blocks);
685 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; 685 ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE;
686 } 686 }
687 out: 687 out:
@@ -695,7 +695,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs,
695 printk(KERN_ERR 695 printk(KERN_ERR
696 "NILFS (device %s): Error roll-forwarding " 696 "NILFS (device %s): Error roll-forwarding "
697 "(err=%d, pseg block=%llu). ", 697 "(err=%d, pseg block=%llu). ",
698 sbi->s_super->s_id, err, (unsigned long long)pseg_start); 698 sb->s_id, err, (unsigned long long)pseg_start);
699 goto out; 699 goto out;
700} 700}
701 701
@@ -724,7 +724,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
724/** 724/**
725 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint 725 * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint
726 * @nilfs: nilfs object 726 * @nilfs: nilfs object
727 * @sbi: nilfs_sb_info 727 * @sb: super block instance
728 * @ri: pointer to a nilfs_recovery_info struct to store search results. 728 * @ri: pointer to a nilfs_recovery_info struct to store search results.
729 * 729 *
730 * Return Value: On success, 0 is returned. On error, one of the following 730 * Return Value: On success, 0 is returned. On error, one of the following
@@ -741,7 +741,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs,
741 * %-ENOMEM - Insufficient memory available. 741 * %-ENOMEM - Insufficient memory available.
742 */ 742 */
743int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, 743int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
744 struct nilfs_sb_info *sbi, 744 struct super_block *sb,
745 struct nilfs_recovery_info *ri) 745 struct nilfs_recovery_info *ri)
746{ 746{
747 struct nilfs_root *root; 747 struct nilfs_root *root;
@@ -750,32 +750,32 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs,
750 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) 750 if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0)
751 return 0; 751 return 0;
752 752
753 err = nilfs_attach_checkpoint(sbi, ri->ri_cno, true, &root); 753 err = nilfs_attach_checkpoint(sb, ri->ri_cno, true, &root);
754 if (unlikely(err)) { 754 if (unlikely(err)) {
755 printk(KERN_ERR 755 printk(KERN_ERR
756 "NILFS: error loading the latest checkpoint.\n"); 756 "NILFS: error loading the latest checkpoint.\n");
757 return err; 757 return err;
758 } 758 }
759 759
760 err = nilfs_do_roll_forward(nilfs, sbi, root, ri); 760 err = nilfs_do_roll_forward(nilfs, sb, root, ri);
761 if (unlikely(err)) 761 if (unlikely(err))
762 goto failed; 762 goto failed;
763 763
764 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) { 764 if (ri->ri_need_recovery == NILFS_RECOVERY_ROLLFORWARD_DONE) {
765 err = nilfs_prepare_segment_for_recovery(nilfs, sbi, ri); 765 err = nilfs_prepare_segment_for_recovery(nilfs, sb, ri);
766 if (unlikely(err)) { 766 if (unlikely(err)) {
767 printk(KERN_ERR "NILFS: Error preparing segments for " 767 printk(KERN_ERR "NILFS: Error preparing segments for "
768 "recovery.\n"); 768 "recovery.\n");
769 goto failed; 769 goto failed;
770 } 770 }
771 771
772 err = nilfs_attach_segment_constructor(sbi, root); 772 err = nilfs_attach_log_writer(sb, root);
773 if (unlikely(err)) 773 if (unlikely(err))
774 goto failed; 774 goto failed;
775 775
776 set_nilfs_discontinued(nilfs); 776 set_nilfs_discontinued(nilfs);
777 err = nilfs_construct_segment(sbi->s_super); 777 err = nilfs_construct_segment(sb);
778 nilfs_detach_segment_constructor(sbi); 778 nilfs_detach_log_writer(sb);
779 779
780 if (unlikely(err)) { 780 if (unlikely(err)) {
781 printk(KERN_ERR "NILFS: Oops! recovery failed. " 781 printk(KERN_ERR "NILFS: Oops! recovery failed. "
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
deleted file mode 100644
index 7a17715f215f..000000000000
--- a/fs/nilfs2/sb.h
+++ /dev/null
@@ -1,85 +0,0 @@
1/*
2 * sb.h - NILFS on-memory super block structure.
3 *
4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Written by Ryusuke Konishi <ryusuke@osrg.net>
21 *
22 */
23
24#ifndef _NILFS_SB
25#define _NILFS_SB
26
27#include <linux/types.h>
28#include <linux/fs.h>
29
30struct the_nilfs;
31struct nilfs_sc_info;
32
33/*
34 * NILFS super-block data in memory
35 */
36struct nilfs_sb_info {
37 /* Mount options */
38 unsigned long s_mount_opt;
39 uid_t s_resuid;
40 gid_t s_resgid;
41
42 unsigned long s_interval; /* construction interval */
43 unsigned long s_watermark; /* threshold of data amount
44 for the segment construction */
45
46 /* Fundamental members */
47 struct super_block *s_super; /* reverse pointer to super_block */
48 struct the_nilfs *s_nilfs;
49
50 /* Segment constructor */
51 struct list_head s_dirty_files; /* dirty files list */
52 struct nilfs_sc_info *s_sc_info; /* segment constructor info */
53 spinlock_t s_inode_lock; /* Lock for the nilfs inode.
54 It covers s_dirty_files list */
55
56 /* Inode allocator */
57 spinlock_t s_next_gen_lock;
58 u32 s_next_generation;
59};
60
61static inline struct nilfs_sb_info *NILFS_SB(struct super_block *sb)
62{
63 return sb->s_fs_info;
64}
65
66static inline struct nilfs_sc_info *NILFS_SC(struct nilfs_sb_info *sbi)
67{
68 return sbi->s_sc_info;
69}
70
71/*
72 * Bit operations for the mount option
73 */
74#define nilfs_clear_opt(sbi, opt) \
75 do { (sbi)->s_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
76#define nilfs_set_opt(sbi, opt) \
77 do { (sbi)->s_mount_opt |= NILFS_MOUNT_##opt; } while (0)
78#define nilfs_test_opt(sbi, opt) ((sbi)->s_mount_opt & NILFS_MOUNT_##opt)
79#define nilfs_write_opt(sbi, mask, opt) \
80 do { (sbi)->s_mount_opt = \
81 (((sbi)->s_mount_opt & ~NILFS_MOUNT_##mask) | \
82 NILFS_MOUNT_##opt); \
83 } while (0)
84
85#endif /* _NILFS_SB */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 55ebae5c7f39..afe4f2183454 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -104,8 +104,7 @@ struct nilfs_sc_operations {
104static void nilfs_segctor_start_timer(struct nilfs_sc_info *); 104static void nilfs_segctor_start_timer(struct nilfs_sc_info *);
105static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); 105static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int);
106static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); 106static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *);
107static void nilfs_dispose_list(struct nilfs_sb_info *, struct list_head *, 107static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int);
108 int);
109 108
110#define nilfs_cnt32_gt(a, b) \ 109#define nilfs_cnt32_gt(a, b) \
111 (typecheck(__u32, a) && typecheck(__u32, b) && \ 110 (typecheck(__u32, a) && typecheck(__u32, b) && \
@@ -182,7 +181,6 @@ int nilfs_transaction_begin(struct super_block *sb,
182 struct nilfs_transaction_info *ti, 181 struct nilfs_transaction_info *ti,
183 int vacancy_check) 182 int vacancy_check)
184{ 183{
185 struct nilfs_sb_info *sbi;
186 struct the_nilfs *nilfs; 184 struct the_nilfs *nilfs;
187 int ret = nilfs_prepare_segment_lock(ti); 185 int ret = nilfs_prepare_segment_lock(ti);
188 186
@@ -193,8 +191,7 @@ int nilfs_transaction_begin(struct super_block *sb,
193 191
194 vfs_check_frozen(sb, SB_FREEZE_WRITE); 192 vfs_check_frozen(sb, SB_FREEZE_WRITE);
195 193
196 sbi = NILFS_SB(sb); 194 nilfs = sb->s_fs_info;
197 nilfs = sbi->s_nilfs;
198 down_read(&nilfs->ns_segctor_sem); 195 down_read(&nilfs->ns_segctor_sem);
199 if (vacancy_check && nilfs_near_disk_full(nilfs)) { 196 if (vacancy_check && nilfs_near_disk_full(nilfs)) {
200 up_read(&nilfs->ns_segctor_sem); 197 up_read(&nilfs->ns_segctor_sem);
@@ -225,8 +222,7 @@ int nilfs_transaction_begin(struct super_block *sb,
225int nilfs_transaction_commit(struct super_block *sb) 222int nilfs_transaction_commit(struct super_block *sb)
226{ 223{
227 struct nilfs_transaction_info *ti = current->journal_info; 224 struct nilfs_transaction_info *ti = current->journal_info;
228 struct nilfs_sb_info *sbi; 225 struct the_nilfs *nilfs = sb->s_fs_info;
229 struct nilfs_sc_info *sci;
230 int err = 0; 226 int err = 0;
231 227
232 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 228 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
@@ -235,16 +231,15 @@ int nilfs_transaction_commit(struct super_block *sb)
235 ti->ti_count--; 231 ti->ti_count--;
236 return 0; 232 return 0;
237 } 233 }
238 sbi = NILFS_SB(sb); 234 if (nilfs->ns_writer) {
239 sci = NILFS_SC(sbi); 235 struct nilfs_sc_info *sci = nilfs->ns_writer;
240 if (sci != NULL) { 236
241 if (ti->ti_flags & NILFS_TI_COMMIT) 237 if (ti->ti_flags & NILFS_TI_COMMIT)
242 nilfs_segctor_start_timer(sci); 238 nilfs_segctor_start_timer(sci);
243 if (atomic_read(&sbi->s_nilfs->ns_ndirtyblks) > 239 if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark)
244 sci->sc_watermark)
245 nilfs_segctor_do_flush(sci, 0); 240 nilfs_segctor_do_flush(sci, 0);
246 } 241 }
247 up_read(&sbi->s_nilfs->ns_segctor_sem); 242 up_read(&nilfs->ns_segctor_sem);
248 current->journal_info = ti->ti_save; 243 current->journal_info = ti->ti_save;
249 244
250 if (ti->ti_flags & NILFS_TI_SYNC) 245 if (ti->ti_flags & NILFS_TI_SYNC)
@@ -257,13 +252,14 @@ int nilfs_transaction_commit(struct super_block *sb)
257void nilfs_transaction_abort(struct super_block *sb) 252void nilfs_transaction_abort(struct super_block *sb)
258{ 253{
259 struct nilfs_transaction_info *ti = current->journal_info; 254 struct nilfs_transaction_info *ti = current->journal_info;
255 struct the_nilfs *nilfs = sb->s_fs_info;
260 256
261 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 257 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
262 if (ti->ti_count > 0) { 258 if (ti->ti_count > 0) {
263 ti->ti_count--; 259 ti->ti_count--;
264 return; 260 return;
265 } 261 }
266 up_read(&NILFS_SB(sb)->s_nilfs->ns_segctor_sem); 262 up_read(&nilfs->ns_segctor_sem);
267 263
268 current->journal_info = ti->ti_save; 264 current->journal_info = ti->ti_save;
269 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 265 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
@@ -272,9 +268,8 @@ void nilfs_transaction_abort(struct super_block *sb)
272 268
273void nilfs_relax_pressure_in_lock(struct super_block *sb) 269void nilfs_relax_pressure_in_lock(struct super_block *sb)
274{ 270{
275 struct nilfs_sb_info *sbi = NILFS_SB(sb); 271 struct the_nilfs *nilfs = sb->s_fs_info;
276 struct nilfs_sc_info *sci = NILFS_SC(sbi); 272 struct nilfs_sc_info *sci = nilfs->ns_writer;
277 struct the_nilfs *nilfs = sbi->s_nilfs;
278 273
279 if (!sci || !sci->sc_flush_request) 274 if (!sci || !sci->sc_flush_request)
280 return; 275 return;
@@ -294,11 +289,13 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb)
294 downgrade_write(&nilfs->ns_segctor_sem); 289 downgrade_write(&nilfs->ns_segctor_sem);
295} 290}
296 291
297static void nilfs_transaction_lock(struct nilfs_sb_info *sbi, 292static void nilfs_transaction_lock(struct super_block *sb,
298 struct nilfs_transaction_info *ti, 293 struct nilfs_transaction_info *ti,
299 int gcflag) 294 int gcflag)
300{ 295{
301 struct nilfs_transaction_info *cur_ti = current->journal_info; 296 struct nilfs_transaction_info *cur_ti = current->journal_info;
297 struct the_nilfs *nilfs = sb->s_fs_info;
298 struct nilfs_sc_info *sci = nilfs->ns_writer;
302 299
303 WARN_ON(cur_ti); 300 WARN_ON(cur_ti);
304 ti->ti_flags = NILFS_TI_WRITER; 301 ti->ti_flags = NILFS_TI_WRITER;
@@ -309,30 +306,31 @@ static void nilfs_transaction_lock(struct nilfs_sb_info *sbi,
309 current->journal_info = ti; 306 current->journal_info = ti;
310 307
311 for (;;) { 308 for (;;) {
312 down_write(&sbi->s_nilfs->ns_segctor_sem); 309 down_write(&nilfs->ns_segctor_sem);
313 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &NILFS_SC(sbi)->sc_flags)) 310 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags))
314 break; 311 break;
315 312
316 nilfs_segctor_do_immediate_flush(NILFS_SC(sbi)); 313 nilfs_segctor_do_immediate_flush(sci);
317 314
318 up_write(&sbi->s_nilfs->ns_segctor_sem); 315 up_write(&nilfs->ns_segctor_sem);
319 yield(); 316 yield();
320 } 317 }
321 if (gcflag) 318 if (gcflag)
322 ti->ti_flags |= NILFS_TI_GC; 319 ti->ti_flags |= NILFS_TI_GC;
323} 320}
324 321
325static void nilfs_transaction_unlock(struct nilfs_sb_info *sbi) 322static void nilfs_transaction_unlock(struct super_block *sb)
326{ 323{
327 struct nilfs_transaction_info *ti = current->journal_info; 324 struct nilfs_transaction_info *ti = current->journal_info;
325 struct the_nilfs *nilfs = sb->s_fs_info;
328 326
329 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 327 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC);
330 BUG_ON(ti->ti_count > 0); 328 BUG_ON(ti->ti_count > 0);
331 329
332 up_write(&sbi->s_nilfs->ns_segctor_sem); 330 up_write(&nilfs->ns_segctor_sem);
333 current->journal_info = ti->ti_save; 331 current->journal_info = ti->ti_save;
334 if (!list_empty(&ti->ti_garbage)) 332 if (!list_empty(&ti->ti_garbage))
335 nilfs_dispose_list(sbi, &ti->ti_garbage, 0); 333 nilfs_dispose_list(nilfs, &ti->ti_garbage, 0);
336} 334}
337 335
338static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 336static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
@@ -430,7 +428,8 @@ static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci,
430 nilfs_segctor_map_segsum_entry( 428 nilfs_segctor_map_segsum_entry(
431 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 429 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo));
432 430
433 if (inode->i_sb && !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 431 if (NILFS_I(inode)->i_root &&
432 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags))
434 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 433 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags);
435 /* skip finfo */ 434 /* skip finfo */
436} 435}
@@ -713,7 +712,7 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
713 } 712 }
714} 713}
715 714
716static void nilfs_dispose_list(struct nilfs_sb_info *sbi, 715static void nilfs_dispose_list(struct the_nilfs *nilfs,
717 struct list_head *head, int force) 716 struct list_head *head, int force)
718{ 717{
719 struct nilfs_inode_info *ii, *n; 718 struct nilfs_inode_info *ii, *n;
@@ -721,7 +720,7 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi,
721 unsigned nv = 0; 720 unsigned nv = 0;
722 721
723 while (!list_empty(head)) { 722 while (!list_empty(head)) {
724 spin_lock(&sbi->s_inode_lock); 723 spin_lock(&nilfs->ns_inode_lock);
725 list_for_each_entry_safe(ii, n, head, i_dirty) { 724 list_for_each_entry_safe(ii, n, head, i_dirty) {
726 list_del_init(&ii->i_dirty); 725 list_del_init(&ii->i_dirty);
727 if (force) { 726 if (force) {
@@ -732,14 +731,14 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi,
732 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { 731 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) {
733 set_bit(NILFS_I_QUEUED, &ii->i_state); 732 set_bit(NILFS_I_QUEUED, &ii->i_state);
734 list_add_tail(&ii->i_dirty, 733 list_add_tail(&ii->i_dirty,
735 &sbi->s_dirty_files); 734 &nilfs->ns_dirty_files);
736 continue; 735 continue;
737 } 736 }
738 ivec[nv++] = ii; 737 ivec[nv++] = ii;
739 if (nv == SC_N_INODEVEC) 738 if (nv == SC_N_INODEVEC)
740 break; 739 break;
741 } 740 }
742 spin_unlock(&sbi->s_inode_lock); 741 spin_unlock(&nilfs->ns_inode_lock);
743 742
744 for (pii = ivec; nv > 0; pii++, nv--) 743 for (pii = ivec; nv > 0; pii++, nv--)
745 iput(&(*pii)->vfs_inode); 744 iput(&(*pii)->vfs_inode);
@@ -772,24 +771,23 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
772 771
773static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) 772static int nilfs_segctor_confirm(struct nilfs_sc_info *sci)
774{ 773{
775 struct nilfs_sb_info *sbi = sci->sc_sbi; 774 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
776 int ret = 0; 775 int ret = 0;
777 776
778 if (nilfs_test_metadata_dirty(sbi->s_nilfs, sci->sc_root)) 777 if (nilfs_test_metadata_dirty(nilfs, sci->sc_root))
779 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 778 set_bit(NILFS_SC_DIRTY, &sci->sc_flags);
780 779
781 spin_lock(&sbi->s_inode_lock); 780 spin_lock(&nilfs->ns_inode_lock);
782 if (list_empty(&sbi->s_dirty_files) && nilfs_segctor_clean(sci)) 781 if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci))
783 ret++; 782 ret++;
784 783
785 spin_unlock(&sbi->s_inode_lock); 784 spin_unlock(&nilfs->ns_inode_lock);
786 return ret; 785 return ret;
787} 786}
788 787
789static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) 788static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
790{ 789{
791 struct nilfs_sb_info *sbi = sci->sc_sbi; 790 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
792 struct the_nilfs *nilfs = sbi->s_nilfs;
793 791
794 nilfs_mdt_clear_dirty(sci->sc_root->ifile); 792 nilfs_mdt_clear_dirty(sci->sc_root->ifile);
795 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 793 nilfs_mdt_clear_dirty(nilfs->ns_cpfile);
@@ -799,7 +797,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci)
799 797
800static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) 798static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
801{ 799{
802 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 800 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
803 struct buffer_head *bh_cp; 801 struct buffer_head *bh_cp;
804 struct nilfs_checkpoint *raw_cp; 802 struct nilfs_checkpoint *raw_cp;
805 int err; 803 int err;
@@ -823,8 +821,7 @@ static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci)
823 821
824static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) 822static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci)
825{ 823{
826 struct nilfs_sb_info *sbi = sci->sc_sbi; 824 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
827 struct the_nilfs *nilfs = sbi->s_nilfs;
828 struct buffer_head *bh_cp; 825 struct buffer_head *bh_cp;
829 struct nilfs_checkpoint *raw_cp; 826 struct nilfs_checkpoint *raw_cp;
830 int err; 827 int err;
@@ -1048,8 +1045,7 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci,
1048 1045
1049static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) 1046static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
1050{ 1047{
1051 struct nilfs_sb_info *sbi = sci->sc_sbi; 1048 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1052 struct the_nilfs *nilfs = sbi->s_nilfs;
1053 struct list_head *head; 1049 struct list_head *head;
1054 struct nilfs_inode_info *ii; 1050 struct nilfs_inode_info *ii;
1055 size_t ndone; 1051 size_t ndone;
@@ -1858,7 +1854,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
1858{ 1854{
1859 struct nilfs_segment_buffer *segbuf; 1855 struct nilfs_segment_buffer *segbuf;
1860 struct page *bd_page = NULL, *fs_page = NULL; 1856 struct page *bd_page = NULL, *fs_page = NULL;
1861 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 1857 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
1862 int update_sr = false; 1858 int update_sr = false;
1863 1859
1864 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1860 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) {
@@ -1962,30 +1958,30 @@ static int nilfs_segctor_wait(struct nilfs_sc_info *sci)
1962 return ret; 1958 return ret;
1963} 1959}
1964 1960
1965static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, 1961static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
1966 struct nilfs_sb_info *sbi) 1962 struct the_nilfs *nilfs)
1967{ 1963{
1968 struct nilfs_inode_info *ii, *n; 1964 struct nilfs_inode_info *ii, *n;
1969 struct inode *ifile = sci->sc_root->ifile; 1965 struct inode *ifile = sci->sc_root->ifile;
1970 1966
1971 spin_lock(&sbi->s_inode_lock); 1967 spin_lock(&nilfs->ns_inode_lock);
1972 retry: 1968 retry:
1973 list_for_each_entry_safe(ii, n, &sbi->s_dirty_files, i_dirty) { 1969 list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) {
1974 if (!ii->i_bh) { 1970 if (!ii->i_bh) {
1975 struct buffer_head *ibh; 1971 struct buffer_head *ibh;
1976 int err; 1972 int err;
1977 1973
1978 spin_unlock(&sbi->s_inode_lock); 1974 spin_unlock(&nilfs->ns_inode_lock);
1979 err = nilfs_ifile_get_inode_block( 1975 err = nilfs_ifile_get_inode_block(
1980 ifile, ii->vfs_inode.i_ino, &ibh); 1976 ifile, ii->vfs_inode.i_ino, &ibh);
1981 if (unlikely(err)) { 1977 if (unlikely(err)) {
1982 nilfs_warning(sbi->s_super, __func__, 1978 nilfs_warning(sci->sc_super, __func__,
1983 "failed to get inode block.\n"); 1979 "failed to get inode block.\n");
1984 return err; 1980 return err;
1985 } 1981 }
1986 nilfs_mdt_mark_buffer_dirty(ibh); 1982 nilfs_mdt_mark_buffer_dirty(ibh);
1987 nilfs_mdt_mark_dirty(ifile); 1983 nilfs_mdt_mark_dirty(ifile);
1988 spin_lock(&sbi->s_inode_lock); 1984 spin_lock(&nilfs->ns_inode_lock);
1989 if (likely(!ii->i_bh)) 1985 if (likely(!ii->i_bh))
1990 ii->i_bh = ibh; 1986 ii->i_bh = ibh;
1991 else 1987 else
@@ -1998,18 +1994,18 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci,
1998 list_del(&ii->i_dirty); 1994 list_del(&ii->i_dirty);
1999 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files); 1995 list_add_tail(&ii->i_dirty, &sci->sc_dirty_files);
2000 } 1996 }
2001 spin_unlock(&sbi->s_inode_lock); 1997 spin_unlock(&nilfs->ns_inode_lock);
2002 1998
2003 return 0; 1999 return 0;
2004} 2000}
2005 2001
2006static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, 2002static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
2007 struct nilfs_sb_info *sbi) 2003 struct the_nilfs *nilfs)
2008{ 2004{
2009 struct nilfs_transaction_info *ti = current->journal_info; 2005 struct nilfs_transaction_info *ti = current->journal_info;
2010 struct nilfs_inode_info *ii, *n; 2006 struct nilfs_inode_info *ii, *n;
2011 2007
2012 spin_lock(&sbi->s_inode_lock); 2008 spin_lock(&nilfs->ns_inode_lock);
2013 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 2009 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
2014 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || 2010 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) ||
2015 test_bit(NILFS_I_DIRTY, &ii->i_state)) 2011 test_bit(NILFS_I_DIRTY, &ii->i_state))
@@ -2021,7 +2017,7 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
2021 list_del(&ii->i_dirty); 2017 list_del(&ii->i_dirty);
2022 list_add_tail(&ii->i_dirty, &ti->ti_garbage); 2018 list_add_tail(&ii->i_dirty, &ti->ti_garbage);
2023 } 2019 }
2024 spin_unlock(&sbi->s_inode_lock); 2020 spin_unlock(&nilfs->ns_inode_lock);
2025} 2021}
2026 2022
2027/* 2023/*
@@ -2029,15 +2025,14 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci,
2029 */ 2025 */
2030static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 2026static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2031{ 2027{
2032 struct nilfs_sb_info *sbi = sci->sc_sbi; 2028 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2033 struct the_nilfs *nilfs = sbi->s_nilfs;
2034 struct page *failed_page; 2029 struct page *failed_page;
2035 int err; 2030 int err;
2036 2031
2037 sci->sc_stage.scnt = NILFS_ST_INIT; 2032 sci->sc_stage.scnt = NILFS_ST_INIT;
2038 sci->sc_cno = nilfs->ns_cno; 2033 sci->sc_cno = nilfs->ns_cno;
2039 2034
2040 err = nilfs_segctor_check_in_files(sci, sbi); 2035 err = nilfs_segctor_collect_dirty_files(sci, nilfs);
2041 if (unlikely(err)) 2036 if (unlikely(err))
2042 goto out; 2037 goto out;
2043 2038
@@ -2115,7 +2110,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
2115 } while (sci->sc_stage.scnt != NILFS_ST_DONE); 2110 } while (sci->sc_stage.scnt != NILFS_ST_DONE);
2116 2111
2117 out: 2112 out:
2118 nilfs_segctor_check_out_files(sci, sbi); 2113 nilfs_segctor_drop_written_files(sci, nilfs);
2119 return err; 2114 return err;
2120 2115
2121 failed_to_write: 2116 failed_to_write:
@@ -2168,8 +2163,8 @@ static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn)
2168 */ 2163 */
2169void nilfs_flush_segment(struct super_block *sb, ino_t ino) 2164void nilfs_flush_segment(struct super_block *sb, ino_t ino)
2170{ 2165{
2171 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2166 struct the_nilfs *nilfs = sb->s_fs_info;
2172 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2167 struct nilfs_sc_info *sci = nilfs->ns_writer;
2173 2168
2174 if (!sci || nilfs_doing_construction()) 2169 if (!sci || nilfs_doing_construction())
2175 return; 2170 return;
@@ -2258,8 +2253,8 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err)
2258 */ 2253 */
2259int nilfs_construct_segment(struct super_block *sb) 2254int nilfs_construct_segment(struct super_block *sb)
2260{ 2255{
2261 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2256 struct the_nilfs *nilfs = sb->s_fs_info;
2262 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2257 struct nilfs_sc_info *sci = nilfs->ns_writer;
2263 struct nilfs_transaction_info *ti; 2258 struct nilfs_transaction_info *ti;
2264 int err; 2259 int err;
2265 2260
@@ -2296,8 +2291,8 @@ int nilfs_construct_segment(struct super_block *sb)
2296int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, 2291int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2297 loff_t start, loff_t end) 2292 loff_t start, loff_t end)
2298{ 2293{
2299 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2294 struct the_nilfs *nilfs = sb->s_fs_info;
2300 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2295 struct nilfs_sc_info *sci = nilfs->ns_writer;
2301 struct nilfs_inode_info *ii; 2296 struct nilfs_inode_info *ii;
2302 struct nilfs_transaction_info ti; 2297 struct nilfs_transaction_info ti;
2303 int err = 0; 2298 int err = 0;
@@ -2305,33 +2300,33 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode,
2305 if (!sci) 2300 if (!sci)
2306 return -EROFS; 2301 return -EROFS;
2307 2302
2308 nilfs_transaction_lock(sbi, &ti, 0); 2303 nilfs_transaction_lock(sb, &ti, 0);
2309 2304
2310 ii = NILFS_I(inode); 2305 ii = NILFS_I(inode);
2311 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) || 2306 if (test_bit(NILFS_I_INODE_DIRTY, &ii->i_state) ||
2312 nilfs_test_opt(sbi, STRICT_ORDER) || 2307 nilfs_test_opt(nilfs, STRICT_ORDER) ||
2313 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2308 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) ||
2314 nilfs_discontinued(sbi->s_nilfs)) { 2309 nilfs_discontinued(nilfs)) {
2315 nilfs_transaction_unlock(sbi); 2310 nilfs_transaction_unlock(sb);
2316 err = nilfs_segctor_sync(sci); 2311 err = nilfs_segctor_sync(sci);
2317 return err; 2312 return err;
2318 } 2313 }
2319 2314
2320 spin_lock(&sbi->s_inode_lock); 2315 spin_lock(&nilfs->ns_inode_lock);
2321 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 2316 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) &&
2322 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 2317 !test_bit(NILFS_I_BUSY, &ii->i_state)) {
2323 spin_unlock(&sbi->s_inode_lock); 2318 spin_unlock(&nilfs->ns_inode_lock);
2324 nilfs_transaction_unlock(sbi); 2319 nilfs_transaction_unlock(sb);
2325 return 0; 2320 return 0;
2326 } 2321 }
2327 spin_unlock(&sbi->s_inode_lock); 2322 spin_unlock(&nilfs->ns_inode_lock);
2328 sci->sc_dsync_inode = ii; 2323 sci->sc_dsync_inode = ii;
2329 sci->sc_dsync_start = start; 2324 sci->sc_dsync_start = start;
2330 sci->sc_dsync_end = end; 2325 sci->sc_dsync_end = end;
2331 2326
2332 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); 2327 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC);
2333 2328
2334 nilfs_transaction_unlock(sbi); 2329 nilfs_transaction_unlock(sb);
2335 return err; 2330 return err;
2336} 2331}
2337 2332
@@ -2387,8 +2382,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err)
2387 */ 2382 */
2388static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) 2383static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2389{ 2384{
2390 struct nilfs_sb_info *sbi = sci->sc_sbi; 2385 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2391 struct the_nilfs *nilfs = sbi->s_nilfs;
2392 struct nilfs_super_block **sbp; 2386 struct nilfs_super_block **sbp;
2393 int err = 0; 2387 int err = 0;
2394 2388
@@ -2406,11 +2400,12 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode)
2406 nilfs_discontinued(nilfs)) { 2400 nilfs_discontinued(nilfs)) {
2407 down_write(&nilfs->ns_sem); 2401 down_write(&nilfs->ns_sem);
2408 err = -EIO; 2402 err = -EIO;
2409 sbp = nilfs_prepare_super(sbi, 2403 sbp = nilfs_prepare_super(sci->sc_super,
2410 nilfs_sb_will_flip(nilfs)); 2404 nilfs_sb_will_flip(nilfs));
2411 if (likely(sbp)) { 2405 if (likely(sbp)) {
2412 nilfs_set_log_cursor(sbp[0], nilfs); 2406 nilfs_set_log_cursor(sbp[0], nilfs);
2413 err = nilfs_commit_super(sbi, NILFS_SB_COMMIT); 2407 err = nilfs_commit_super(sci->sc_super,
2408 NILFS_SB_COMMIT);
2414 } 2409 }
2415 up_write(&nilfs->ns_sem); 2410 up_write(&nilfs->ns_sem);
2416 } 2411 }
@@ -2442,16 +2437,15 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head)
2442int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, 2437int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2443 void **kbufs) 2438 void **kbufs)
2444{ 2439{
2445 struct nilfs_sb_info *sbi = NILFS_SB(sb); 2440 struct the_nilfs *nilfs = sb->s_fs_info;
2446 struct nilfs_sc_info *sci = NILFS_SC(sbi); 2441 struct nilfs_sc_info *sci = nilfs->ns_writer;
2447 struct the_nilfs *nilfs = sbi->s_nilfs;
2448 struct nilfs_transaction_info ti; 2442 struct nilfs_transaction_info ti;
2449 int err; 2443 int err;
2450 2444
2451 if (unlikely(!sci)) 2445 if (unlikely(!sci))
2452 return -EROFS; 2446 return -EROFS;
2453 2447
2454 nilfs_transaction_lock(sbi, &ti, 1); 2448 nilfs_transaction_lock(sb, &ti, 1);
2455 2449
2456 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); 2450 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat);
2457 if (unlikely(err)) 2451 if (unlikely(err))
@@ -2479,14 +2473,14 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2479 set_current_state(TASK_INTERRUPTIBLE); 2473 set_current_state(TASK_INTERRUPTIBLE);
2480 schedule_timeout(sci->sc_interval); 2474 schedule_timeout(sci->sc_interval);
2481 } 2475 }
2482 if (nilfs_test_opt(sbi, DISCARD)) { 2476 if (nilfs_test_opt(nilfs, DISCARD)) {
2483 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, 2477 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs,
2484 sci->sc_nfreesegs); 2478 sci->sc_nfreesegs);
2485 if (ret) { 2479 if (ret) {
2486 printk(KERN_WARNING 2480 printk(KERN_WARNING
2487 "NILFS warning: error %d on discard request, " 2481 "NILFS warning: error %d on discard request, "
2488 "turning discards off for the device\n", ret); 2482 "turning discards off for the device\n", ret);
2489 nilfs_clear_opt(sbi, DISCARD); 2483 nilfs_clear_opt(nilfs, DISCARD);
2490 } 2484 }
2491 } 2485 }
2492 2486
@@ -2494,16 +2488,15 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
2494 sci->sc_freesegs = NULL; 2488 sci->sc_freesegs = NULL;
2495 sci->sc_nfreesegs = 0; 2489 sci->sc_nfreesegs = 0;
2496 nilfs_mdt_clear_shadow_map(nilfs->ns_dat); 2490 nilfs_mdt_clear_shadow_map(nilfs->ns_dat);
2497 nilfs_transaction_unlock(sbi); 2491 nilfs_transaction_unlock(sb);
2498 return err; 2492 return err;
2499} 2493}
2500 2494
2501static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) 2495static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2502{ 2496{
2503 struct nilfs_sb_info *sbi = sci->sc_sbi;
2504 struct nilfs_transaction_info ti; 2497 struct nilfs_transaction_info ti;
2505 2498
2506 nilfs_transaction_lock(sbi, &ti, 0); 2499 nilfs_transaction_lock(sci->sc_super, &ti, 0);
2507 nilfs_segctor_construct(sci, mode); 2500 nilfs_segctor_construct(sci, mode);
2508 2501
2509 /* 2502 /*
@@ -2514,7 +2507,7 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode)
2514 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) 2507 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags))
2515 nilfs_segctor_start_timer(sci); 2508 nilfs_segctor_start_timer(sci);
2516 2509
2517 nilfs_transaction_unlock(sbi); 2510 nilfs_transaction_unlock(sci->sc_super);
2518} 2511}
2519 2512
2520static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) 2513static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci)
@@ -2560,7 +2553,7 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci)
2560static int nilfs_segctor_thread(void *arg) 2553static int nilfs_segctor_thread(void *arg)
2561{ 2554{
2562 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2555 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg;
2563 struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs; 2556 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2564 int timeout = 0; 2557 int timeout = 0;
2565 2558
2566 sci->sc_timer.data = (unsigned long)current; 2559 sci->sc_timer.data = (unsigned long)current;
@@ -2671,17 +2664,17 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci)
2671/* 2664/*
2672 * Setup & clean-up functions 2665 * Setup & clean-up functions
2673 */ 2666 */
2674static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, 2667static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
2675 struct nilfs_root *root) 2668 struct nilfs_root *root)
2676{ 2669{
2670 struct the_nilfs *nilfs = sb->s_fs_info;
2677 struct nilfs_sc_info *sci; 2671 struct nilfs_sc_info *sci;
2678 2672
2679 sci = kzalloc(sizeof(*sci), GFP_KERNEL); 2673 sci = kzalloc(sizeof(*sci), GFP_KERNEL);
2680 if (!sci) 2674 if (!sci)
2681 return NULL; 2675 return NULL;
2682 2676
2683 sci->sc_sbi = sbi; 2677 sci->sc_super = sb;
2684 sci->sc_super = sbi->s_super;
2685 2678
2686 nilfs_get_root(root); 2679 nilfs_get_root(root);
2687 sci->sc_root = root; 2680 sci->sc_root = root;
@@ -2701,10 +2694,10 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi,
2701 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2694 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ;
2702 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2695 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK;
2703 2696
2704 if (sbi->s_interval) 2697 if (nilfs->ns_interval)
2705 sci->sc_interval = sbi->s_interval; 2698 sci->sc_interval = nilfs->ns_interval;
2706 if (sbi->s_watermark) 2699 if (nilfs->ns_watermark)
2707 sci->sc_watermark = sbi->s_watermark; 2700 sci->sc_watermark = nilfs->ns_watermark;
2708 return sci; 2701 return sci;
2709} 2702}
2710 2703
@@ -2715,12 +2708,11 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2715 /* The segctord thread was stopped and its timer was removed. 2708 /* The segctord thread was stopped and its timer was removed.
2716 But some tasks remain. */ 2709 But some tasks remain. */
2717 do { 2710 do {
2718 struct nilfs_sb_info *sbi = sci->sc_sbi;
2719 struct nilfs_transaction_info ti; 2711 struct nilfs_transaction_info ti;
2720 2712
2721 nilfs_transaction_lock(sbi, &ti, 0); 2713 nilfs_transaction_lock(sci->sc_super, &ti, 0);
2722 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2714 ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
2723 nilfs_transaction_unlock(sbi); 2715 nilfs_transaction_unlock(sci->sc_super);
2724 2716
2725 } while (ret && retrycount-- > 0); 2717 } while (ret && retrycount-- > 0);
2726} 2718}
@@ -2735,10 +2727,10 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
2735 */ 2727 */
2736static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) 2728static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2737{ 2729{
2738 struct nilfs_sb_info *sbi = sci->sc_sbi; 2730 struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
2739 int flag; 2731 int flag;
2740 2732
2741 up_write(&sbi->s_nilfs->ns_segctor_sem); 2733 up_write(&nilfs->ns_segctor_sem);
2742 2734
2743 spin_lock(&sci->sc_state_lock); 2735 spin_lock(&sci->sc_state_lock);
2744 nilfs_segctor_kill_thread(sci); 2736 nilfs_segctor_kill_thread(sci);
@@ -2752,9 +2744,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2752 WARN_ON(!list_empty(&sci->sc_copied_buffers)); 2744 WARN_ON(!list_empty(&sci->sc_copied_buffers));
2753 2745
2754 if (!list_empty(&sci->sc_dirty_files)) { 2746 if (!list_empty(&sci->sc_dirty_files)) {
2755 nilfs_warning(sbi->s_super, __func__, 2747 nilfs_warning(sci->sc_super, __func__,
2756 "dirty file(s) after the final construction\n"); 2748 "dirty file(s) after the final construction\n");
2757 nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1); 2749 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
2758 } 2750 }
2759 2751
2760 WARN_ON(!list_empty(&sci->sc_segbufs)); 2752 WARN_ON(!list_empty(&sci->sc_segbufs));
@@ -2762,79 +2754,78 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
2762 2754
2763 nilfs_put_root(sci->sc_root); 2755 nilfs_put_root(sci->sc_root);
2764 2756
2765 down_write(&sbi->s_nilfs->ns_segctor_sem); 2757 down_write(&nilfs->ns_segctor_sem);
2766 2758
2767 del_timer_sync(&sci->sc_timer); 2759 del_timer_sync(&sci->sc_timer);
2768 kfree(sci); 2760 kfree(sci);
2769} 2761}
2770 2762
2771/** 2763/**
2772 * nilfs_attach_segment_constructor - attach a segment constructor 2764 * nilfs_attach_log_writer - attach log writer
2773 * @sbi: nilfs_sb_info 2765 * @sb: super block instance
2774 * @root: root object of the current filesystem tree 2766 * @root: root object of the current filesystem tree
2775 * 2767 *
2776 * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, 2768 * This allocates a log writer object, initializes it, and starts the
2777 * initializes it, and starts the segment constructor. 2769 * log writer.
2778 * 2770 *
2779 * Return Value: On success, 0 is returned. On error, one of the following 2771 * Return Value: On success, 0 is returned. On error, one of the following
2780 * negative error code is returned. 2772 * negative error code is returned.
2781 * 2773 *
2782 * %-ENOMEM - Insufficient memory available. 2774 * %-ENOMEM - Insufficient memory available.
2783 */ 2775 */
2784int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, 2776int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root)
2785 struct nilfs_root *root)
2786{ 2777{
2778 struct the_nilfs *nilfs = sb->s_fs_info;
2787 int err; 2779 int err;
2788 2780
2789 if (NILFS_SC(sbi)) { 2781 if (nilfs->ns_writer) {
2790 /* 2782 /*
2791 * This happens if the filesystem was remounted 2783 * This happens if the filesystem was remounted
2792 * read/write after nilfs_error degenerated it into a 2784 * read/write after nilfs_error degenerated it into a
2793 * read-only mount. 2785 * read-only mount.
2794 */ 2786 */
2795 nilfs_detach_segment_constructor(sbi); 2787 nilfs_detach_log_writer(sb);
2796 } 2788 }
2797 2789
2798 sbi->s_sc_info = nilfs_segctor_new(sbi, root); 2790 nilfs->ns_writer = nilfs_segctor_new(sb, root);
2799 if (!sbi->s_sc_info) 2791 if (!nilfs->ns_writer)
2800 return -ENOMEM; 2792 return -ENOMEM;
2801 2793
2802 err = nilfs_segctor_start_thread(NILFS_SC(sbi)); 2794 err = nilfs_segctor_start_thread(nilfs->ns_writer);
2803 if (err) { 2795 if (err) {
2804 kfree(sbi->s_sc_info); 2796 kfree(nilfs->ns_writer);
2805 sbi->s_sc_info = NULL; 2797 nilfs->ns_writer = NULL;
2806 } 2798 }
2807 return err; 2799 return err;
2808} 2800}
2809 2801
2810/** 2802/**
2811 * nilfs_detach_segment_constructor - destroy the segment constructor 2803 * nilfs_detach_log_writer - destroy log writer
2812 * @sbi: nilfs_sb_info 2804 * @sb: super block instance
2813 * 2805 *
2814 * nilfs_detach_segment_constructor() kills the segment constructor daemon, 2806 * This kills log writer daemon, frees the log writer object, and
2815 * frees the struct nilfs_sc_info, and destroy the dirty file list. 2807 * destroys list of dirty files.
2816 */ 2808 */
2817void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) 2809void nilfs_detach_log_writer(struct super_block *sb)
2818{ 2810{
2819 struct the_nilfs *nilfs = sbi->s_nilfs; 2811 struct the_nilfs *nilfs = sb->s_fs_info;
2820 LIST_HEAD(garbage_list); 2812 LIST_HEAD(garbage_list);
2821 2813
2822 down_write(&nilfs->ns_segctor_sem); 2814 down_write(&nilfs->ns_segctor_sem);
2823 if (NILFS_SC(sbi)) { 2815 if (nilfs->ns_writer) {
2824 nilfs_segctor_destroy(NILFS_SC(sbi)); 2816 nilfs_segctor_destroy(nilfs->ns_writer);
2825 sbi->s_sc_info = NULL; 2817 nilfs->ns_writer = NULL;
2826 } 2818 }
2827 2819
2828 /* Force to free the list of dirty files */ 2820 /* Force to free the list of dirty files */
2829 spin_lock(&sbi->s_inode_lock); 2821 spin_lock(&nilfs->ns_inode_lock);
2830 if (!list_empty(&sbi->s_dirty_files)) { 2822 if (!list_empty(&nilfs->ns_dirty_files)) {
2831 list_splice_init(&sbi->s_dirty_files, &garbage_list); 2823 list_splice_init(&nilfs->ns_dirty_files, &garbage_list);
2832 nilfs_warning(sbi->s_super, __func__, 2824 nilfs_warning(sb, __func__,
2833 "Non empty dirty list after the last " 2825 "Hit dirty file after stopped log writer\n");
2834 "segment construction\n"); 2826 }
2835 } 2827 spin_unlock(&nilfs->ns_inode_lock);
2836 spin_unlock(&sbi->s_inode_lock);
2837 up_write(&nilfs->ns_segctor_sem); 2828 up_write(&nilfs->ns_segctor_sem);
2838 2829
2839 nilfs_dispose_list(sbi, &garbage_list, 1); 2830 nilfs_dispose_list(nilfs, &garbage_list, 1);
2840} 2831}
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index cd8056e7cbed..6c02a86745fb 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -27,7 +27,7 @@
27#include <linux/fs.h> 27#include <linux/fs.h>
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h> 29#include <linux/nilfs2_fs.h>
30#include "sb.h" 30#include "nilfs.h"
31 31
32struct nilfs_root; 32struct nilfs_root;
33 33
@@ -88,7 +88,6 @@ struct nilfs_segsum_pointer {
88/** 88/**
89 * struct nilfs_sc_info - Segment constructor information 89 * struct nilfs_sc_info - Segment constructor information
90 * @sc_super: Back pointer to super_block struct 90 * @sc_super: Back pointer to super_block struct
91 * @sc_sbi: Back pointer to nilfs_sb_info struct
92 * @sc_root: root object of the current filesystem tree 91 * @sc_root: root object of the current filesystem tree
93 * @sc_nblk_inc: Block count of current generation 92 * @sc_nblk_inc: Block count of current generation
94 * @sc_dirty_files: List of files to be written 93 * @sc_dirty_files: List of files to be written
@@ -131,7 +130,6 @@ struct nilfs_segsum_pointer {
131 */ 130 */
132struct nilfs_sc_info { 131struct nilfs_sc_info {
133 struct super_block *sc_super; 132 struct super_block *sc_super;
134 struct nilfs_sb_info *sc_sbi;
135 struct nilfs_root *sc_root; 133 struct nilfs_root *sc_root;
136 134
137 unsigned long sc_nblk_inc; 135 unsigned long sc_nblk_inc;
@@ -235,18 +233,16 @@ extern void nilfs_flush_segment(struct super_block *, ino_t);
235extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, 233extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
236 void **); 234 void **);
237 235
238int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, 236int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root);
239 struct nilfs_root *root); 237void nilfs_detach_log_writer(struct super_block *sb);
240extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
241 238
242/* recovery.c */ 239/* recovery.c */
243extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t, 240extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t,
244 struct buffer_head **, int); 241 struct buffer_head **, int);
245extern int nilfs_search_super_root(struct the_nilfs *, 242extern int nilfs_search_super_root(struct the_nilfs *,
246 struct nilfs_recovery_info *); 243 struct nilfs_recovery_info *);
247extern int nilfs_salvage_orphan_logs(struct the_nilfs *, 244int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, struct super_block *sb,
248 struct nilfs_sb_info *, 245 struct nilfs_recovery_info *ri);
249 struct nilfs_recovery_info *);
250extern void nilfs_dispose_segment_list(struct list_head *); 246extern void nilfs_dispose_segment_list(struct list_head *);
251 247
252#endif /* _NILFS_SEGMENT_H */ 248#endif /* _NILFS_SEGMENT_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1673b3d99842..062cca065195 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -43,7 +43,6 @@
43#include <linux/init.h> 43#include <linux/init.h>
44#include <linux/blkdev.h> 44#include <linux/blkdev.h>
45#include <linux/parser.h> 45#include <linux/parser.h>
46#include <linux/random.h>
47#include <linux/crc32.h> 46#include <linux/crc32.h>
48#include <linux/vfs.h> 47#include <linux/vfs.h>
49#include <linux/writeback.h> 48#include <linux/writeback.h>
@@ -72,23 +71,23 @@ struct kmem_cache *nilfs_transaction_cachep;
72struct kmem_cache *nilfs_segbuf_cachep; 71struct kmem_cache *nilfs_segbuf_cachep;
73struct kmem_cache *nilfs_btree_path_cache; 72struct kmem_cache *nilfs_btree_path_cache;
74 73
75static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount); 74static int nilfs_setup_super(struct super_block *sb, int is_mount);
76static int nilfs_remount(struct super_block *sb, int *flags, char *data); 75static int nilfs_remount(struct super_block *sb, int *flags, char *data);
77 76
78static void nilfs_set_error(struct nilfs_sb_info *sbi) 77static void nilfs_set_error(struct super_block *sb)
79{ 78{
80 struct the_nilfs *nilfs = sbi->s_nilfs; 79 struct the_nilfs *nilfs = sb->s_fs_info;
81 struct nilfs_super_block **sbp; 80 struct nilfs_super_block **sbp;
82 81
83 down_write(&nilfs->ns_sem); 82 down_write(&nilfs->ns_sem);
84 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { 83 if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) {
85 nilfs->ns_mount_state |= NILFS_ERROR_FS; 84 nilfs->ns_mount_state |= NILFS_ERROR_FS;
86 sbp = nilfs_prepare_super(sbi, 0); 85 sbp = nilfs_prepare_super(sb, 0);
87 if (likely(sbp)) { 86 if (likely(sbp)) {
88 sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); 87 sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
89 if (sbp[1]) 88 if (sbp[1])
90 sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); 89 sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS);
91 nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); 90 nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
92 } 91 }
93 } 92 }
94 up_write(&nilfs->ns_sem); 93 up_write(&nilfs->ns_sem);
@@ -109,7 +108,7 @@ static void nilfs_set_error(struct nilfs_sb_info *sbi)
109void nilfs_error(struct super_block *sb, const char *function, 108void nilfs_error(struct super_block *sb, const char *function,
110 const char *fmt, ...) 109 const char *fmt, ...)
111{ 110{
112 struct nilfs_sb_info *sbi = NILFS_SB(sb); 111 struct the_nilfs *nilfs = sb->s_fs_info;
113 struct va_format vaf; 112 struct va_format vaf;
114 va_list args; 113 va_list args;
115 114
@@ -124,15 +123,15 @@ void nilfs_error(struct super_block *sb, const char *function,
124 va_end(args); 123 va_end(args);
125 124
126 if (!(sb->s_flags & MS_RDONLY)) { 125 if (!(sb->s_flags & MS_RDONLY)) {
127 nilfs_set_error(sbi); 126 nilfs_set_error(sb);
128 127
129 if (nilfs_test_opt(sbi, ERRORS_RO)) { 128 if (nilfs_test_opt(nilfs, ERRORS_RO)) {
130 printk(KERN_CRIT "Remounting filesystem read-only\n"); 129 printk(KERN_CRIT "Remounting filesystem read-only\n");
131 sb->s_flags |= MS_RDONLY; 130 sb->s_flags |= MS_RDONLY;
132 } 131 }
133 } 132 }
134 133
135 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 134 if (nilfs_test_opt(nilfs, ERRORS_PANIC))
136 panic("NILFS (device %s): panic forced after error\n", 135 panic("NILFS (device %s): panic forced after error\n",
137 sb->s_id); 136 sb->s_id);
138} 137}
@@ -189,14 +188,14 @@ void nilfs_destroy_inode(struct inode *inode)
189 call_rcu(&inode->i_rcu, nilfs_i_callback); 188 call_rcu(&inode->i_rcu, nilfs_i_callback);
190} 189}
191 190
192static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) 191static int nilfs_sync_super(struct super_block *sb, int flag)
193{ 192{
194 struct the_nilfs *nilfs = sbi->s_nilfs; 193 struct the_nilfs *nilfs = sb->s_fs_info;
195 int err; 194 int err;
196 195
197 retry: 196 retry:
198 set_buffer_dirty(nilfs->ns_sbh[0]); 197 set_buffer_dirty(nilfs->ns_sbh[0]);
199 if (nilfs_test_opt(sbi, BARRIER)) { 198 if (nilfs_test_opt(nilfs, BARRIER)) {
200 err = __sync_dirty_buffer(nilfs->ns_sbh[0], 199 err = __sync_dirty_buffer(nilfs->ns_sbh[0],
201 WRITE_SYNC | WRITE_FLUSH_FUA); 200 WRITE_SYNC | WRITE_FLUSH_FUA);
202 } else { 201 } else {
@@ -263,10 +262,10 @@ void nilfs_set_log_cursor(struct nilfs_super_block *sbp,
263 spin_unlock(&nilfs->ns_last_segment_lock); 262 spin_unlock(&nilfs->ns_last_segment_lock);
264} 263}
265 264
266struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi, 265struct nilfs_super_block **nilfs_prepare_super(struct super_block *sb,
267 int flip) 266 int flip)
268{ 267{
269 struct the_nilfs *nilfs = sbi->s_nilfs; 268 struct the_nilfs *nilfs = sb->s_fs_info;
270 struct nilfs_super_block **sbp = nilfs->ns_sbp; 269 struct nilfs_super_block **sbp = nilfs->ns_sbp;
271 270
272 /* nilfs->ns_sem must be locked by the caller. */ 271 /* nilfs->ns_sem must be locked by the caller. */
@@ -276,7 +275,7 @@ struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi,
276 memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); 275 memcpy(sbp[0], sbp[1], nilfs->ns_sbsize);
277 } else { 276 } else {
278 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", 277 printk(KERN_CRIT "NILFS: superblock broke on dev %s\n",
279 sbi->s_super->s_id); 278 sb->s_id);
280 return NULL; 279 return NULL;
281 } 280 }
282 } else if (sbp[1] && 281 } else if (sbp[1] &&
@@ -290,9 +289,9 @@ struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi,
290 return sbp; 289 return sbp;
291} 290}
292 291
293int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag) 292int nilfs_commit_super(struct super_block *sb, int flag)
294{ 293{
295 struct the_nilfs *nilfs = sbi->s_nilfs; 294 struct the_nilfs *nilfs = sb->s_fs_info;
296 struct nilfs_super_block **sbp = nilfs->ns_sbp; 295 struct nilfs_super_block **sbp = nilfs->ns_sbp;
297 time_t t; 296 time_t t;
298 297
@@ -312,27 +311,28 @@ int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag)
312 nilfs->ns_sbsize)); 311 nilfs->ns_sbsize));
313 } 312 }
314 clear_nilfs_sb_dirty(nilfs); 313 clear_nilfs_sb_dirty(nilfs);
315 return nilfs_sync_super(sbi, flag); 314 return nilfs_sync_super(sb, flag);
316} 315}
317 316
318/** 317/**
319 * nilfs_cleanup_super() - write filesystem state for cleanup 318 * nilfs_cleanup_super() - write filesystem state for cleanup
320 * @sbi: nilfs_sb_info to be unmounted or degraded to read-only 319 * @sb: super block instance to be unmounted or degraded to read-only
321 * 320 *
322 * This function restores state flags in the on-disk super block. 321 * This function restores state flags in the on-disk super block.
323 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the 322 * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the
324 * filesystem was not clean previously. 323 * filesystem was not clean previously.
325 */ 324 */
326int nilfs_cleanup_super(struct nilfs_sb_info *sbi) 325int nilfs_cleanup_super(struct super_block *sb)
327{ 326{
327 struct the_nilfs *nilfs = sb->s_fs_info;
328 struct nilfs_super_block **sbp; 328 struct nilfs_super_block **sbp;
329 int flag = NILFS_SB_COMMIT; 329 int flag = NILFS_SB_COMMIT;
330 int ret = -EIO; 330 int ret = -EIO;
331 331
332 sbp = nilfs_prepare_super(sbi, 0); 332 sbp = nilfs_prepare_super(sb, 0);
333 if (sbp) { 333 if (sbp) {
334 sbp[0]->s_state = cpu_to_le16(sbi->s_nilfs->ns_mount_state); 334 sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state);
335 nilfs_set_log_cursor(sbp[0], sbi->s_nilfs); 335 nilfs_set_log_cursor(sbp[0], nilfs);
336 if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { 336 if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) {
337 /* 337 /*
338 * make the "clean" flag also to the opposite 338 * make the "clean" flag also to the opposite
@@ -342,21 +342,20 @@ int nilfs_cleanup_super(struct nilfs_sb_info *sbi)
342 sbp[1]->s_state = sbp[0]->s_state; 342 sbp[1]->s_state = sbp[0]->s_state;
343 flag = NILFS_SB_COMMIT_ALL; 343 flag = NILFS_SB_COMMIT_ALL;
344 } 344 }
345 ret = nilfs_commit_super(sbi, flag); 345 ret = nilfs_commit_super(sb, flag);
346 } 346 }
347 return ret; 347 return ret;
348} 348}
349 349
350static void nilfs_put_super(struct super_block *sb) 350static void nilfs_put_super(struct super_block *sb)
351{ 351{
352 struct nilfs_sb_info *sbi = NILFS_SB(sb); 352 struct the_nilfs *nilfs = sb->s_fs_info;
353 struct the_nilfs *nilfs = sbi->s_nilfs;
354 353
355 nilfs_detach_segment_constructor(sbi); 354 nilfs_detach_log_writer(sb);
356 355
357 if (!(sb->s_flags & MS_RDONLY)) { 356 if (!(sb->s_flags & MS_RDONLY)) {
358 down_write(&nilfs->ns_sem); 357 down_write(&nilfs->ns_sem);
359 nilfs_cleanup_super(sbi); 358 nilfs_cleanup_super(sb);
360 up_write(&nilfs->ns_sem); 359 up_write(&nilfs->ns_sem);
361 } 360 }
362 361
@@ -365,15 +364,12 @@ static void nilfs_put_super(struct super_block *sb)
365 iput(nilfs->ns_dat); 364 iput(nilfs->ns_dat);
366 365
367 destroy_nilfs(nilfs); 366 destroy_nilfs(nilfs);
368 sbi->s_super = NULL;
369 sb->s_fs_info = NULL; 367 sb->s_fs_info = NULL;
370 kfree(sbi);
371} 368}
372 369
373static int nilfs_sync_fs(struct super_block *sb, int wait) 370static int nilfs_sync_fs(struct super_block *sb, int wait)
374{ 371{
375 struct nilfs_sb_info *sbi = NILFS_SB(sb); 372 struct the_nilfs *nilfs = sb->s_fs_info;
376 struct the_nilfs *nilfs = sbi->s_nilfs;
377 struct nilfs_super_block **sbp; 373 struct nilfs_super_block **sbp;
378 int err = 0; 374 int err = 0;
379 375
@@ -383,10 +379,10 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
383 379
384 down_write(&nilfs->ns_sem); 380 down_write(&nilfs->ns_sem);
385 if (nilfs_sb_dirty(nilfs)) { 381 if (nilfs_sb_dirty(nilfs)) {
386 sbp = nilfs_prepare_super(sbi, nilfs_sb_will_flip(nilfs)); 382 sbp = nilfs_prepare_super(sb, nilfs_sb_will_flip(nilfs));
387 if (likely(sbp)) { 383 if (likely(sbp)) {
388 nilfs_set_log_cursor(sbp[0], nilfs); 384 nilfs_set_log_cursor(sbp[0], nilfs);
389 nilfs_commit_super(sbi, NILFS_SB_COMMIT); 385 nilfs_commit_super(sb, NILFS_SB_COMMIT);
390 } 386 }
391 } 387 }
392 up_write(&nilfs->ns_sem); 388 up_write(&nilfs->ns_sem);
@@ -394,10 +390,10 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
394 return err; 390 return err;
395} 391}
396 392
397int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, 393int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt,
398 struct nilfs_root **rootp) 394 struct nilfs_root **rootp)
399{ 395{
400 struct the_nilfs *nilfs = sbi->s_nilfs; 396 struct the_nilfs *nilfs = sb->s_fs_info;
401 struct nilfs_root *root; 397 struct nilfs_root *root;
402 struct nilfs_checkpoint *raw_cp; 398 struct nilfs_checkpoint *raw_cp;
403 struct buffer_head *bh_cp; 399 struct buffer_head *bh_cp;
@@ -426,7 +422,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt,
426 goto failed; 422 goto failed;
427 } 423 }
428 424
429 err = nilfs_ifile_read(sbi->s_super, root, nilfs->ns_inode_size, 425 err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size,
430 &raw_cp->cp_ifile_inode, &root->ifile); 426 &raw_cp->cp_ifile_inode, &root->ifile);
431 if (err) 427 if (err)
432 goto failed_bh; 428 goto failed_bh;
@@ -450,8 +446,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt,
450 446
451static int nilfs_freeze(struct super_block *sb) 447static int nilfs_freeze(struct super_block *sb)
452{ 448{
453 struct nilfs_sb_info *sbi = NILFS_SB(sb); 449 struct the_nilfs *nilfs = sb->s_fs_info;
454 struct the_nilfs *nilfs = sbi->s_nilfs;
455 int err; 450 int err;
456 451
457 if (sb->s_flags & MS_RDONLY) 452 if (sb->s_flags & MS_RDONLY)
@@ -459,21 +454,20 @@ static int nilfs_freeze(struct super_block *sb)
459 454
460 /* Mark super block clean */ 455 /* Mark super block clean */
461 down_write(&nilfs->ns_sem); 456 down_write(&nilfs->ns_sem);
462 err = nilfs_cleanup_super(sbi); 457 err = nilfs_cleanup_super(sb);
463 up_write(&nilfs->ns_sem); 458 up_write(&nilfs->ns_sem);
464 return err; 459 return err;
465} 460}
466 461
467static int nilfs_unfreeze(struct super_block *sb) 462static int nilfs_unfreeze(struct super_block *sb)
468{ 463{
469 struct nilfs_sb_info *sbi = NILFS_SB(sb); 464 struct the_nilfs *nilfs = sb->s_fs_info;
470 struct the_nilfs *nilfs = sbi->s_nilfs;
471 465
472 if (sb->s_flags & MS_RDONLY) 466 if (sb->s_flags & MS_RDONLY)
473 return 0; 467 return 0;
474 468
475 down_write(&nilfs->ns_sem); 469 down_write(&nilfs->ns_sem);
476 nilfs_setup_super(sbi, false); 470 nilfs_setup_super(sb, false);
477 up_write(&nilfs->ns_sem); 471 up_write(&nilfs->ns_sem);
478 return 0; 472 return 0;
479} 473}
@@ -530,22 +524,22 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
530static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) 524static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
531{ 525{
532 struct super_block *sb = vfs->mnt_sb; 526 struct super_block *sb = vfs->mnt_sb;
533 struct nilfs_sb_info *sbi = NILFS_SB(sb); 527 struct the_nilfs *nilfs = sb->s_fs_info;
534 struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; 528 struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root;
535 529
536 if (!nilfs_test_opt(sbi, BARRIER)) 530 if (!nilfs_test_opt(nilfs, BARRIER))
537 seq_puts(seq, ",nobarrier"); 531 seq_puts(seq, ",nobarrier");
538 if (root->cno != NILFS_CPTREE_CURRENT_CNO) 532 if (root->cno != NILFS_CPTREE_CURRENT_CNO)
539 seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); 533 seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno);
540 if (nilfs_test_opt(sbi, ERRORS_PANIC)) 534 if (nilfs_test_opt(nilfs, ERRORS_PANIC))
541 seq_puts(seq, ",errors=panic"); 535 seq_puts(seq, ",errors=panic");
542 if (nilfs_test_opt(sbi, ERRORS_CONT)) 536 if (nilfs_test_opt(nilfs, ERRORS_CONT))
543 seq_puts(seq, ",errors=continue"); 537 seq_puts(seq, ",errors=continue");
544 if (nilfs_test_opt(sbi, STRICT_ORDER)) 538 if (nilfs_test_opt(nilfs, STRICT_ORDER))
545 seq_puts(seq, ",order=strict"); 539 seq_puts(seq, ",order=strict");
546 if (nilfs_test_opt(sbi, NORECOVERY)) 540 if (nilfs_test_opt(nilfs, NORECOVERY))
547 seq_puts(seq, ",norecovery"); 541 seq_puts(seq, ",norecovery");
548 if (nilfs_test_opt(sbi, DISCARD)) 542 if (nilfs_test_opt(nilfs, DISCARD))
549 seq_puts(seq, ",discard"); 543 seq_puts(seq, ",discard");
550 544
551 return 0; 545 return 0;
@@ -594,7 +588,7 @@ static match_table_t tokens = {
594 588
595static int parse_options(char *options, struct super_block *sb, int is_remount) 589static int parse_options(char *options, struct super_block *sb, int is_remount)
596{ 590{
597 struct nilfs_sb_info *sbi = NILFS_SB(sb); 591 struct the_nilfs *nilfs = sb->s_fs_info;
598 char *p; 592 char *p;
599 substring_t args[MAX_OPT_ARGS]; 593 substring_t args[MAX_OPT_ARGS];
600 594
@@ -609,29 +603,29 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
609 token = match_token(p, tokens, args); 603 token = match_token(p, tokens, args);
610 switch (token) { 604 switch (token) {
611 case Opt_barrier: 605 case Opt_barrier:
612 nilfs_set_opt(sbi, BARRIER); 606 nilfs_set_opt(nilfs, BARRIER);
613 break; 607 break;
614 case Opt_nobarrier: 608 case Opt_nobarrier:
615 nilfs_clear_opt(sbi, BARRIER); 609 nilfs_clear_opt(nilfs, BARRIER);
616 break; 610 break;
617 case Opt_order: 611 case Opt_order:
618 if (strcmp(args[0].from, "relaxed") == 0) 612 if (strcmp(args[0].from, "relaxed") == 0)
619 /* Ordered data semantics */ 613 /* Ordered data semantics */
620 nilfs_clear_opt(sbi, STRICT_ORDER); 614 nilfs_clear_opt(nilfs, STRICT_ORDER);
621 else if (strcmp(args[0].from, "strict") == 0) 615 else if (strcmp(args[0].from, "strict") == 0)
622 /* Strict in-order semantics */ 616 /* Strict in-order semantics */
623 nilfs_set_opt(sbi, STRICT_ORDER); 617 nilfs_set_opt(nilfs, STRICT_ORDER);
624 else 618 else
625 return 0; 619 return 0;
626 break; 620 break;
627 case Opt_err_panic: 621 case Opt_err_panic:
628 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_PANIC); 622 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC);
629 break; 623 break;
630 case Opt_err_ro: 624 case Opt_err_ro:
631 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_RO); 625 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO);
632 break; 626 break;
633 case Opt_err_cont: 627 case Opt_err_cont:
634 nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); 628 nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT);
635 break; 629 break;
636 case Opt_snapshot: 630 case Opt_snapshot:
637 if (is_remount) { 631 if (is_remount) {
@@ -642,13 +636,13 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
642 } 636 }
643 break; 637 break;
644 case Opt_norecovery: 638 case Opt_norecovery:
645 nilfs_set_opt(sbi, NORECOVERY); 639 nilfs_set_opt(nilfs, NORECOVERY);
646 break; 640 break;
647 case Opt_discard: 641 case Opt_discard:
648 nilfs_set_opt(sbi, DISCARD); 642 nilfs_set_opt(nilfs, DISCARD);
649 break; 643 break;
650 case Opt_nodiscard: 644 case Opt_nodiscard:
651 nilfs_clear_opt(sbi, DISCARD); 645 nilfs_clear_opt(nilfs, DISCARD);
652 break; 646 break;
653 default: 647 default:
654 printk(KERN_ERR 648 printk(KERN_ERR
@@ -660,22 +654,24 @@ static int parse_options(char *options, struct super_block *sb, int is_remount)
660} 654}
661 655
662static inline void 656static inline void
663nilfs_set_default_options(struct nilfs_sb_info *sbi, 657nilfs_set_default_options(struct super_block *sb,
664 struct nilfs_super_block *sbp) 658 struct nilfs_super_block *sbp)
665{ 659{
666 sbi->s_mount_opt = 660 struct the_nilfs *nilfs = sb->s_fs_info;
661
662 nilfs->ns_mount_opt =
667 NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; 663 NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER;
668} 664}
669 665
670static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount) 666static int nilfs_setup_super(struct super_block *sb, int is_mount)
671{ 667{
672 struct the_nilfs *nilfs = sbi->s_nilfs; 668 struct the_nilfs *nilfs = sb->s_fs_info;
673 struct nilfs_super_block **sbp; 669 struct nilfs_super_block **sbp;
674 int max_mnt_count; 670 int max_mnt_count;
675 int mnt_count; 671 int mnt_count;
676 672
677 /* nilfs->ns_sem must be locked by the caller. */ 673 /* nilfs->ns_sem must be locked by the caller. */
678 sbp = nilfs_prepare_super(sbi, 0); 674 sbp = nilfs_prepare_super(sb, 0);
679 if (!sbp) 675 if (!sbp)
680 return -EIO; 676 return -EIO;
681 677
@@ -706,7 +702,7 @@ skip_mount_setup:
706 /* synchronize sbp[1] with sbp[0] */ 702 /* synchronize sbp[1] with sbp[0] */
707 if (sbp[1]) 703 if (sbp[1])
708 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); 704 memcpy(sbp[1], sbp[0], nilfs->ns_sbsize);
709 return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); 705 return nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL);
710} 706}
711 707
712struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, 708struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb,
@@ -727,7 +723,7 @@ int nilfs_store_magic_and_option(struct super_block *sb,
727 struct nilfs_super_block *sbp, 723 struct nilfs_super_block *sbp,
728 char *data) 724 char *data)
729{ 725{
730 struct nilfs_sb_info *sbi = NILFS_SB(sb); 726 struct the_nilfs *nilfs = sb->s_fs_info;
731 727
732 sb->s_magic = le16_to_cpu(sbp->s_magic); 728 sb->s_magic = le16_to_cpu(sbp->s_magic);
733 729
@@ -736,12 +732,12 @@ int nilfs_store_magic_and_option(struct super_block *sb,
736 sb->s_flags |= MS_NOATIME; 732 sb->s_flags |= MS_NOATIME;
737#endif 733#endif
738 734
739 nilfs_set_default_options(sbi, sbp); 735 nilfs_set_default_options(sb, sbp);
740 736
741 sbi->s_resuid = le16_to_cpu(sbp->s_def_resuid); 737 nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid);
742 sbi->s_resgid = le16_to_cpu(sbp->s_def_resgid); 738 nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid);
743 sbi->s_interval = le32_to_cpu(sbp->s_c_interval); 739 nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval);
744 sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); 740 nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max);
745 741
746 return !parse_options(data, sb, 0) ? -EINVAL : 0 ; 742 return !parse_options(data, sb, 0) ? -EINVAL : 0 ;
747} 743}
@@ -822,7 +818,7 @@ static int nilfs_get_root_dentry(struct super_block *sb,
822static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, 818static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
823 struct dentry **root_dentry) 819 struct dentry **root_dentry)
824{ 820{
825 struct the_nilfs *nilfs = NILFS_SB(s)->s_nilfs; 821 struct the_nilfs *nilfs = s->s_fs_info;
826 struct nilfs_root *root; 822 struct nilfs_root *root;
827 int ret; 823 int ret;
828 824
@@ -840,7 +836,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
840 goto out; 836 goto out;
841 } 837 }
842 838
843 ret = nilfs_attach_checkpoint(NILFS_SB(s), cno, false, &root); 839 ret = nilfs_attach_checkpoint(s, cno, false, &root);
844 if (ret) { 840 if (ret) {
845 printk(KERN_ERR "NILFS: error loading snapshot " 841 printk(KERN_ERR "NILFS: error loading snapshot "
846 "(checkpoint number=%llu).\n", 842 "(checkpoint number=%llu).\n",
@@ -874,7 +870,7 @@ static int nilfs_try_to_shrink_tree(struct dentry *root_dentry)
874 870
875int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) 871int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
876{ 872{
877 struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; 873 struct the_nilfs *nilfs = sb->s_fs_info;
878 struct nilfs_root *root; 874 struct nilfs_root *root;
879 struct inode *inode; 875 struct inode *inode;
880 struct dentry *dentry; 876 struct dentry *dentry;
@@ -887,7 +883,7 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno)
887 return true; /* protect recent checkpoints */ 883 return true; /* protect recent checkpoints */
888 884
889 ret = false; 885 ret = false;
890 root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); 886 root = nilfs_lookup_root(nilfs, cno);
891 if (root) { 887 if (root) {
892 inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); 888 inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO);
893 if (inode) { 889 if (inode) {
@@ -917,43 +913,21 @@ static int
917nilfs_fill_super(struct super_block *sb, void *data, int silent) 913nilfs_fill_super(struct super_block *sb, void *data, int silent)
918{ 914{
919 struct the_nilfs *nilfs; 915 struct the_nilfs *nilfs;
920 struct nilfs_sb_info *sbi;
921 struct nilfs_root *fsroot; 916 struct nilfs_root *fsroot;
922 struct backing_dev_info *bdi; 917 struct backing_dev_info *bdi;
923 __u64 cno; 918 __u64 cno;
924 int err; 919 int err;
925 920
926 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 921 nilfs = alloc_nilfs(sb->s_bdev);
927 if (!sbi) 922 if (!nilfs)
928 return -ENOMEM; 923 return -ENOMEM;
929 924
930 sb->s_fs_info = sbi; 925 sb->s_fs_info = nilfs;
931 sbi->s_super = sb;
932
933 nilfs = alloc_nilfs(sb->s_bdev);
934 if (!nilfs) {
935 err = -ENOMEM;
936 goto failed_sbi;
937 }
938 sbi->s_nilfs = nilfs;
939 926
940 err = init_nilfs(nilfs, sbi, (char *)data); 927 err = init_nilfs(nilfs, sb, (char *)data);
941 if (err) 928 if (err)
942 goto failed_nilfs; 929 goto failed_nilfs;
943 930
944 spin_lock_init(&sbi->s_inode_lock);
945 INIT_LIST_HEAD(&sbi->s_dirty_files);
946
947 /*
948 * Following initialization is overlapped because
949 * nilfs_sb_info structure has been cleared at the beginning.
950 * But we reserve them to keep our interest and make ready
951 * for the future change.
952 */
953 get_random_bytes(&sbi->s_next_generation,
954 sizeof(sbi->s_next_generation));
955 spin_lock_init(&sbi->s_next_gen_lock);
956
957 sb->s_op = &nilfs_sops; 931 sb->s_op = &nilfs_sops;
958 sb->s_export_op = &nilfs_export_ops; 932 sb->s_export_op = &nilfs_export_ops;
959 sb->s_root = NULL; 933 sb->s_root = NULL;
@@ -962,12 +936,12 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
962 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; 936 bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
963 sb->s_bdi = bdi ? : &default_backing_dev_info; 937 sb->s_bdi = bdi ? : &default_backing_dev_info;
964 938
965 err = load_nilfs(nilfs, sbi); 939 err = load_nilfs(nilfs, sb);
966 if (err) 940 if (err)
967 goto failed_nilfs; 941 goto failed_nilfs;
968 942
969 cno = nilfs_last_cno(nilfs); 943 cno = nilfs_last_cno(nilfs);
970 err = nilfs_attach_checkpoint(sbi, cno, true, &fsroot); 944 err = nilfs_attach_checkpoint(sb, cno, true, &fsroot);
971 if (err) { 945 if (err) {
972 printk(KERN_ERR "NILFS: error loading last checkpoint " 946 printk(KERN_ERR "NILFS: error loading last checkpoint "
973 "(checkpoint number=%llu).\n", (unsigned long long)cno); 947 "(checkpoint number=%llu).\n", (unsigned long long)cno);
@@ -975,7 +949,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
975 } 949 }
976 950
977 if (!(sb->s_flags & MS_RDONLY)) { 951 if (!(sb->s_flags & MS_RDONLY)) {
978 err = nilfs_attach_segment_constructor(sbi, fsroot); 952 err = nilfs_attach_log_writer(sb, fsroot);
979 if (err) 953 if (err)
980 goto failed_checkpoint; 954 goto failed_checkpoint;
981 } 955 }
@@ -988,14 +962,14 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
988 962
989 if (!(sb->s_flags & MS_RDONLY)) { 963 if (!(sb->s_flags & MS_RDONLY)) {
990 down_write(&nilfs->ns_sem); 964 down_write(&nilfs->ns_sem);
991 nilfs_setup_super(sbi, true); 965 nilfs_setup_super(sb, true);
992 up_write(&nilfs->ns_sem); 966 up_write(&nilfs->ns_sem);
993 } 967 }
994 968
995 return 0; 969 return 0;
996 970
997 failed_segctor: 971 failed_segctor:
998 nilfs_detach_segment_constructor(sbi); 972 nilfs_detach_log_writer(sb);
999 973
1000 failed_checkpoint: 974 failed_checkpoint:
1001 nilfs_put_root(fsroot); 975 nilfs_put_root(fsroot);
@@ -1007,23 +981,18 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
1007 981
1008 failed_nilfs: 982 failed_nilfs:
1009 destroy_nilfs(nilfs); 983 destroy_nilfs(nilfs);
1010
1011 failed_sbi:
1012 sb->s_fs_info = NULL;
1013 kfree(sbi);
1014 return err; 984 return err;
1015} 985}
1016 986
1017static int nilfs_remount(struct super_block *sb, int *flags, char *data) 987static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1018{ 988{
1019 struct nilfs_sb_info *sbi = NILFS_SB(sb); 989 struct the_nilfs *nilfs = sb->s_fs_info;
1020 struct the_nilfs *nilfs = sbi->s_nilfs;
1021 unsigned long old_sb_flags; 990 unsigned long old_sb_flags;
1022 unsigned long old_mount_opt; 991 unsigned long old_mount_opt;
1023 int err; 992 int err;
1024 993
1025 old_sb_flags = sb->s_flags; 994 old_sb_flags = sb->s_flags;
1026 old_mount_opt = sbi->s_mount_opt; 995 old_mount_opt = nilfs->ns_mount_opt;
1027 996
1028 if (!parse_options(data, sb, 1)) { 997 if (!parse_options(data, sb, 1)) {
1029 err = -EINVAL; 998 err = -EINVAL;
@@ -1043,8 +1012,8 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1043 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 1012 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
1044 goto out; 1013 goto out;
1045 if (*flags & MS_RDONLY) { 1014 if (*flags & MS_RDONLY) {
1046 /* Shutting down the segment constructor */ 1015 /* Shutting down log writer */
1047 nilfs_detach_segment_constructor(sbi); 1016 nilfs_detach_log_writer(sb);
1048 sb->s_flags |= MS_RDONLY; 1017 sb->s_flags |= MS_RDONLY;
1049 1018
1050 /* 1019 /*
@@ -1052,7 +1021,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1052 * the RDONLY flag and then mark the partition as valid again. 1021 * the RDONLY flag and then mark the partition as valid again.
1053 */ 1022 */
1054 down_write(&nilfs->ns_sem); 1023 down_write(&nilfs->ns_sem);
1055 nilfs_cleanup_super(sbi); 1024 nilfs_cleanup_super(sb);
1056 up_write(&nilfs->ns_sem); 1025 up_write(&nilfs->ns_sem);
1057 } else { 1026 } else {
1058 __u64 features; 1027 __u64 features;
@@ -1079,12 +1048,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1079 sb->s_flags &= ~MS_RDONLY; 1048 sb->s_flags &= ~MS_RDONLY;
1080 1049
1081 root = NILFS_I(sb->s_root->d_inode)->i_root; 1050 root = NILFS_I(sb->s_root->d_inode)->i_root;
1082 err = nilfs_attach_segment_constructor(sbi, root); 1051 err = nilfs_attach_log_writer(sb, root);
1083 if (err) 1052 if (err)
1084 goto restore_opts; 1053 goto restore_opts;
1085 1054
1086 down_write(&nilfs->ns_sem); 1055 down_write(&nilfs->ns_sem);
1087 nilfs_setup_super(sbi, true); 1056 nilfs_setup_super(sb, true);
1088 up_write(&nilfs->ns_sem); 1057 up_write(&nilfs->ns_sem);
1089 } 1058 }
1090 out: 1059 out:
@@ -1092,13 +1061,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
1092 1061
1093 restore_opts: 1062 restore_opts:
1094 sb->s_flags = old_sb_flags; 1063 sb->s_flags = old_sb_flags;
1095 sbi->s_mount_opt = old_mount_opt; 1064 nilfs->ns_mount_opt = old_mount_opt;
1096 return err; 1065 return err;
1097} 1066}
1098 1067
1099struct nilfs_super_data { 1068struct nilfs_super_data {
1100 struct block_device *bdev; 1069 struct block_device *bdev;
1101 struct nilfs_sb_info *sbi;
1102 __u64 cno; 1070 __u64 cno;
1103 int flags; 1071 int flags;
1104}; 1072};
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index ad4ac607cf57..d2acd1a651f3 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -25,6 +25,7 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/blkdev.h> 26#include <linux/blkdev.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/random.h>
28#include <linux/crc32.h> 29#include <linux/crc32.h>
29#include "nilfs.h" 30#include "nilfs.h"
30#include "segment.h" 31#include "segment.h"
@@ -75,7 +76,10 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
75 nilfs->ns_bdev = bdev; 76 nilfs->ns_bdev = bdev;
76 atomic_set(&nilfs->ns_ndirtyblks, 0); 77 atomic_set(&nilfs->ns_ndirtyblks, 0);
77 init_rwsem(&nilfs->ns_sem); 78 init_rwsem(&nilfs->ns_sem);
79 INIT_LIST_HEAD(&nilfs->ns_dirty_files);
78 INIT_LIST_HEAD(&nilfs->ns_gc_inodes); 80 INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
81 spin_lock_init(&nilfs->ns_inode_lock);
82 spin_lock_init(&nilfs->ns_next_gen_lock);
79 spin_lock_init(&nilfs->ns_last_segment_lock); 83 spin_lock_init(&nilfs->ns_last_segment_lock);
80 nilfs->ns_cptree = RB_ROOT; 84 nilfs->ns_cptree = RB_ROOT;
81 spin_lock_init(&nilfs->ns_cptree_lock); 85 spin_lock_init(&nilfs->ns_cptree_lock);
@@ -197,16 +201,16 @@ static int nilfs_store_log_cursor(struct the_nilfs *nilfs,
197/** 201/**
198 * load_nilfs - load and recover the nilfs 202 * load_nilfs - load and recover the nilfs
199 * @nilfs: the_nilfs structure to be released 203 * @nilfs: the_nilfs structure to be released
200 * @sbi: nilfs_sb_info used to recover past segment 204 * @sb: super block isntance used to recover past segment
201 * 205 *
202 * load_nilfs() searches and load the latest super root, 206 * load_nilfs() searches and load the latest super root,
203 * attaches the last segment, and does recovery if needed. 207 * attaches the last segment, and does recovery if needed.
204 * The caller must call this exclusively for simultaneous mounts. 208 * The caller must call this exclusively for simultaneous mounts.
205 */ 209 */
206int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) 210int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
207{ 211{
208 struct nilfs_recovery_info ri; 212 struct nilfs_recovery_info ri;
209 unsigned int s_flags = sbi->s_super->s_flags; 213 unsigned int s_flags = sb->s_flags;
210 int really_read_only = bdev_read_only(nilfs->ns_bdev); 214 int really_read_only = bdev_read_only(nilfs->ns_bdev);
211 int valid_fs = nilfs_valid_fs(nilfs); 215 int valid_fs = nilfs_valid_fs(nilfs);
212 int err; 216 int err;
@@ -271,7 +275,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
271 goto scan_error; 275 goto scan_error;
272 } 276 }
273 277
274 err = nilfs_load_super_root(nilfs, sbi->s_super, ri.ri_super_root); 278 err = nilfs_load_super_root(nilfs, sb, ri.ri_super_root);
275 if (unlikely(err)) { 279 if (unlikely(err)) {
276 printk(KERN_ERR "NILFS: error loading super root.\n"); 280 printk(KERN_ERR "NILFS: error loading super root.\n");
277 goto failed; 281 goto failed;
@@ -283,7 +287,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
283 if (s_flags & MS_RDONLY) { 287 if (s_flags & MS_RDONLY) {
284 __u64 features; 288 __u64 features;
285 289
286 if (nilfs_test_opt(sbi, NORECOVERY)) { 290 if (nilfs_test_opt(nilfs, NORECOVERY)) {
287 printk(KERN_INFO "NILFS: norecovery option specified. " 291 printk(KERN_INFO "NILFS: norecovery option specified. "
288 "skipping roll-forward recovery\n"); 292 "skipping roll-forward recovery\n");
289 goto skip_recovery; 293 goto skip_recovery;
@@ -304,21 +308,21 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
304 err = -EROFS; 308 err = -EROFS;
305 goto failed_unload; 309 goto failed_unload;
306 } 310 }
307 sbi->s_super->s_flags &= ~MS_RDONLY; 311 sb->s_flags &= ~MS_RDONLY;
308 } else if (nilfs_test_opt(sbi, NORECOVERY)) { 312 } else if (nilfs_test_opt(nilfs, NORECOVERY)) {
309 printk(KERN_ERR "NILFS: recovery cancelled because norecovery " 313 printk(KERN_ERR "NILFS: recovery cancelled because norecovery "
310 "option was specified for a read/write mount\n"); 314 "option was specified for a read/write mount\n");
311 err = -EINVAL; 315 err = -EINVAL;
312 goto failed_unload; 316 goto failed_unload;
313 } 317 }
314 318
315 err = nilfs_salvage_orphan_logs(nilfs, sbi, &ri); 319 err = nilfs_salvage_orphan_logs(nilfs, sb, &ri);
316 if (err) 320 if (err)
317 goto failed_unload; 321 goto failed_unload;
318 322
319 down_write(&nilfs->ns_sem); 323 down_write(&nilfs->ns_sem);
320 nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */ 324 nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */
321 err = nilfs_cleanup_super(sbi); 325 err = nilfs_cleanup_super(sb);
322 up_write(&nilfs->ns_sem); 326 up_write(&nilfs->ns_sem);
323 327
324 if (err) { 328 if (err) {
@@ -330,7 +334,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
330 334
331 skip_recovery: 335 skip_recovery:
332 nilfs_clear_recovery_info(&ri); 336 nilfs_clear_recovery_info(&ri);
333 sbi->s_super->s_flags = s_flags; 337 sb->s_flags = s_flags;
334 return 0; 338 return 0;
335 339
336 scan_error: 340 scan_error:
@@ -344,7 +348,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
344 348
345 failed: 349 failed:
346 nilfs_clear_recovery_info(&ri); 350 nilfs_clear_recovery_info(&ri);
347 sbi->s_super->s_flags = s_flags; 351 sb->s_flags = s_flags;
348 return err; 352 return err;
349} 353}
350 354
@@ -475,10 +479,13 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
475 return -EIO; 479 return -EIO;
476 } 480 }
477 printk(KERN_WARNING 481 printk(KERN_WARNING
478 "NILFS warning: unable to read primary superblock\n"); 482 "NILFS warning: unable to read primary superblock "
479 } else if (!sbp[1]) 483 "(blocksize = %d)\n", blocksize);
484 } else if (!sbp[1]) {
480 printk(KERN_WARNING 485 printk(KERN_WARNING
481 "NILFS warning: unable to read secondary superblock\n"); 486 "NILFS warning: unable to read secondary superblock "
487 "(blocksize = %d)\n", blocksize);
488 }
482 489
483 /* 490 /*
484 * Compare two super blocks and set 1 in swp if the secondary 491 * Compare two super blocks and set 1 in swp if the secondary
@@ -505,7 +512,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
505 512
506 if (!valid[!swp]) 513 if (!valid[!swp])
507 printk(KERN_WARNING "NILFS warning: broken superblock. " 514 printk(KERN_WARNING "NILFS warning: broken superblock. "
508 "using spare superblock.\n"); 515 "using spare superblock (blocksize = %d).\n", blocksize);
509 if (swp) 516 if (swp)
510 nilfs_swap_super_block(nilfs); 517 nilfs_swap_super_block(nilfs);
511 518
@@ -519,7 +526,6 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
519/** 526/**
520 * init_nilfs - initialize a NILFS instance. 527 * init_nilfs - initialize a NILFS instance.
521 * @nilfs: the_nilfs structure 528 * @nilfs: the_nilfs structure
522 * @sbi: nilfs_sb_info
523 * @sb: super block 529 * @sb: super block
524 * @data: mount options 530 * @data: mount options
525 * 531 *
@@ -530,9 +536,8 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
530 * Return Value: On success, 0 is returned. On error, a negative error 536 * Return Value: On success, 0 is returned. On error, a negative error
531 * code is returned. 537 * code is returned.
532 */ 538 */
533int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) 539int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
534{ 540{
535 struct super_block *sb = sbi->s_super;
536 struct nilfs_super_block *sbp; 541 struct nilfs_super_block *sbp;
537 int blocksize; 542 int blocksize;
538 int err; 543 int err;
@@ -588,6 +593,9 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
588 nilfs->ns_blocksize_bits = sb->s_blocksize_bits; 593 nilfs->ns_blocksize_bits = sb->s_blocksize_bits;
589 nilfs->ns_blocksize = blocksize; 594 nilfs->ns_blocksize = blocksize;
590 595
596 get_random_bytes(&nilfs->ns_next_generation,
597 sizeof(nilfs->ns_next_generation));
598
591 err = nilfs_store_disk_layout(nilfs, sbp); 599 err = nilfs_store_disk_layout(nilfs, sbp);
592 if (err) 600 if (err)
593 goto failed_sbh; 601 goto failed_sbh;
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index fd85e4c05c6b..f4968145c2a3 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -31,7 +31,8 @@
31#include <linux/blkdev.h> 31#include <linux/blkdev.h>
32#include <linux/backing-dev.h> 32#include <linux/backing-dev.h>
33#include <linux/slab.h> 33#include <linux/slab.h>
34#include "sb.h" 34
35struct nilfs_sc_info;
35 36
36/* the_nilfs struct */ 37/* the_nilfs struct */
37enum { 38enum {
@@ -65,13 +66,23 @@ enum {
65 * @ns_last_cno: checkpoint number of the latest segment 66 * @ns_last_cno: checkpoint number of the latest segment
66 * @ns_prot_seq: least sequence number of segments which must not be reclaimed 67 * @ns_prot_seq: least sequence number of segments which must not be reclaimed
67 * @ns_prev_seq: base sequence number used to decide if advance log cursor 68 * @ns_prev_seq: base sequence number used to decide if advance log cursor
68 * @ns_segctor_sem: segment constructor semaphore 69 * @ns_writer: log writer
70 * @ns_segctor_sem: semaphore protecting log write
69 * @ns_dat: DAT file inode 71 * @ns_dat: DAT file inode
70 * @ns_cpfile: checkpoint file inode 72 * @ns_cpfile: checkpoint file inode
71 * @ns_sufile: segusage file inode 73 * @ns_sufile: segusage file inode
72 * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) 74 * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root)
73 * @ns_cptree_lock: lock protecting @ns_cptree 75 * @ns_cptree_lock: lock protecting @ns_cptree
76 * @ns_dirty_files: list of dirty files
77 * @ns_inode_lock: lock protecting @ns_dirty_files
74 * @ns_gc_inodes: dummy inodes to keep live blocks 78 * @ns_gc_inodes: dummy inodes to keep live blocks
79 * @ns_next_generation: next generation number for inodes
80 * @ns_next_gen_lock: lock protecting @ns_next_generation
81 * @ns_mount_opt: mount options
82 * @ns_resuid: uid for reserved blocks
83 * @ns_resgid: gid for reserved blocks
84 * @ns_interval: checkpoint creation interval
85 * @ns_watermark: watermark for the number of dirty buffers
75 * @ns_blocksize_bits: bit length of block size 86 * @ns_blocksize_bits: bit length of block size
76 * @ns_blocksize: block size 87 * @ns_blocksize: block size
77 * @ns_nsegments: number of segments in filesystem 88 * @ns_nsegments: number of segments in filesystem
@@ -131,6 +142,7 @@ struct the_nilfs {
131 u64 ns_prot_seq; 142 u64 ns_prot_seq;
132 u64 ns_prev_seq; 143 u64 ns_prev_seq;
133 144
145 struct nilfs_sc_info *ns_writer;
134 struct rw_semaphore ns_segctor_sem; 146 struct rw_semaphore ns_segctor_sem;
135 147
136 /* 148 /*
@@ -145,9 +157,25 @@ struct the_nilfs {
145 struct rb_root ns_cptree; 157 struct rb_root ns_cptree;
146 spinlock_t ns_cptree_lock; 158 spinlock_t ns_cptree_lock;
147 159
160 /* Dirty inode list */
161 struct list_head ns_dirty_files;
162 spinlock_t ns_inode_lock;
163
148 /* GC inode list */ 164 /* GC inode list */
149 struct list_head ns_gc_inodes; 165 struct list_head ns_gc_inodes;
150 166
167 /* Inode allocator */
168 u32 ns_next_generation;
169 spinlock_t ns_next_gen_lock;
170
171 /* Mount options */
172 unsigned long ns_mount_opt;
173
174 uid_t ns_resuid;
175 gid_t ns_resgid;
176 unsigned long ns_interval;
177 unsigned long ns_watermark;
178
151 /* Disk layout information (static) */ 179 /* Disk layout information (static) */
152 unsigned int ns_blocksize_bits; 180 unsigned int ns_blocksize_bits;
153 unsigned int ns_blocksize; 181 unsigned int ns_blocksize;
@@ -180,6 +208,20 @@ THE_NILFS_FNS(DISCONTINUED, discontinued)
180THE_NILFS_FNS(GC_RUNNING, gc_running) 208THE_NILFS_FNS(GC_RUNNING, gc_running)
181THE_NILFS_FNS(SB_DIRTY, sb_dirty) 209THE_NILFS_FNS(SB_DIRTY, sb_dirty)
182 210
211/*
212 * Mount option operations
213 */
214#define nilfs_clear_opt(nilfs, opt) \
215 do { (nilfs)->ns_mount_opt &= ~NILFS_MOUNT_##opt; } while (0)
216#define nilfs_set_opt(nilfs, opt) \
217 do { (nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt; } while (0)
218#define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt)
219#define nilfs_write_opt(nilfs, mask, opt) \
220 do { (nilfs)->ns_mount_opt = \
221 (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \
222 NILFS_MOUNT_##opt); \
223 } while (0)
224
183/** 225/**
184 * struct nilfs_root - nilfs root object 226 * struct nilfs_root - nilfs root object
185 * @cno: checkpoint number 227 * @cno: checkpoint number
@@ -224,15 +266,14 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs)
224void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); 266void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
225struct the_nilfs *alloc_nilfs(struct block_device *bdev); 267struct the_nilfs *alloc_nilfs(struct block_device *bdev);
226void destroy_nilfs(struct the_nilfs *nilfs); 268void destroy_nilfs(struct the_nilfs *nilfs);
227int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); 269int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data);
228int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); 270int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb);
229int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); 271int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t);
230int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); 272int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
231struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); 273struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno);
232struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, 274struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs,
233 __u64 cno); 275 __u64 cno);
234void nilfs_put_root(struct nilfs_root *root); 276void nilfs_put_root(struct nilfs_root *root);
235struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
236int nilfs_near_disk_full(struct the_nilfs *); 277int nilfs_near_disk_full(struct the_nilfs *);
237void nilfs_fall_back_super_block(struct the_nilfs *); 278void nilfs_fall_back_super_block(struct the_nilfs *);
238void nilfs_swap_super_block(struct the_nilfs *); 279void nilfs_swap_super_block(struct the_nilfs *);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8b61220cffc5..6b1305dc26c0 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -876,7 +876,7 @@ SYSCALL_ALIAS(sys_fanotify_mark, SyS_fanotify_mark);
876#endif 876#endif
877 877
878/* 878/*
879 * fanotify_user_setup - Our initialization function. Note that we cannnot return 879 * fanotify_user_setup - Our initialization function. Note that we cannot return
880 * error because we have compiled-in VFS hooks. So an (unlikely) failure here 880 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
881 * must result in panic(). 881 * must result in panic().
882 */ 882 */
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 4cd5d5d78f9f..bd46e7c8a0ef 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -841,7 +841,7 @@ out:
841} 841}
842 842
843/* 843/*
844 * inotify_user_setup - Our initialization function. Note that we cannnot return 844 * inotify_user_setup - Our initialization function. Note that we cannot return
845 * error because we have compiled-in VFS hooks. So an (unlikely) failure here 845 * error because we have compiled-in VFS hooks. So an (unlikely) failure here
846 * must result in panic(). 846 * must result in panic().
847 */ 847 */
diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile
index 4ff028fcfd6e..30206b238433 100644
--- a/fs/ntfs/Makefile
+++ b/fs/ntfs/Makefile
@@ -2,18 +2,13 @@
2 2
3obj-$(CONFIG_NTFS_FS) += ntfs.o 3obj-$(CONFIG_NTFS_FS) += ntfs.o
4 4
5ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ 5ntfs-y := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \
6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ 6 index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \
7 unistr.o upcase.o 7 unistr.o upcase.o
8 8
9EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.30\" 9ntfs-$(CONFIG_NTFS_RW) += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
10 10
11ifeq ($(CONFIG_NTFS_DEBUG),y) 11ccflags-y := -DNTFS_VERSION=\"2.1.30\"
12EXTRA_CFLAGS += -DDEBUG 12ccflags-$(CONFIG_NTFS_DEBUG) += -DDEBUG
13endif 13ccflags-$(CONFIG_NTFS_RW) += -DNTFS_RW
14 14
15ifeq ($(CONFIG_NTFS_RW),y)
16EXTRA_CFLAGS += -DNTFS_RW
17
18ntfs-objs += bitmap.o lcnalloc.o logfile.o quota.o usnjrnl.o
19endif
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 07d9fd854350..d8a0313e99e6 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -1,6 +1,6 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3EXTRA_CFLAGS += -DCATCH_BH_JBD_RACES 3ccflags-y += -DCATCH_BH_JBD_RACES
4 4
5obj-$(CONFIG_OCFS2_FS) += \ 5obj-$(CONFIG_OCFS2_FS) += \
6 ocfs2.o \ 6 ocfs2.o \
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 704f6b1742f3..90f2729b7a5b 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -497,7 +497,7 @@ static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
497 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) 497 if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
498 return -EOPNOTSUPP; 498 return -EOPNOTSUPP;
499 499
500 if (!is_owner_or_cap(inode)) 500 if (!inode_owner_or_capable(inode))
501 return -EPERM; 501 return -EPERM;
502 502
503 if (value) { 503 if (value) {
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 6d80ecc7834f..7eb90403fc8a 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -56,7 +56,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
56 int ret = 0; /* if all else fails, just return false */ 56 int ret = 0; /* if all else fails, just return false */
57 struct ocfs2_super *osb; 57 struct ocfs2_super *osb;
58 58
59 if (nd->flags & LOOKUP_RCU) 59 if (nd && nd->flags & LOOKUP_RCU)
60 return -ECHILD; 60 return -ECHILD;
61 61
62 inode = dentry->d_inode; 62 inode = dentry->d_inode;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index d417b3f9b0c7..f97b6f1c61dd 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -354,7 +354,7 @@ static inline int ocfs2_match(int len,
354/* 354/*
355 * Returns 0 if not found, -1 on failure, and 1 on success 355 * Returns 0 if not found, -1 on failure, and 1 on success
356 */ 356 */
357static int inline ocfs2_search_dirblock(struct buffer_head *bh, 357static inline int ocfs2_search_dirblock(struct buffer_head *bh,
358 struct inode *dir, 358 struct inode *dir,
359 const char *name, int namelen, 359 const char *name, int namelen,
360 unsigned long offset, 360 unsigned long offset,
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index dcebf0d920fa..c8a044efbb15 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o 3obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
4 4
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile
index df69b4856d0d..f14be89a6701 100644
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -1,4 +1,4 @@
1EXTRA_CFLAGS += -Ifs/ocfs2 1ccflags-y := -Ifs/ocfs2
2 2
3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o 3obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
4 4
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 5dbc3062b4fd..254652a9b542 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
197 dentry->d_name.len, dentry->d_name.name, 197 dentry->d_name.len, dentry->d_name.name,
198 fh, len, connectable); 198 fh, len, connectable);
199 199
200 if (len < 3 || (connectable && len < 6)) { 200 if (connectable && (len < 6)) {
201 mlog(ML_ERROR, "fh buffer is too small for encoding\n"); 201 *max_len = 6;
202 type = 255;
203 goto bail;
204 } else if (len < 3) {
205 *max_len = 3;
202 type = 255; 206 type = 255;
203 goto bail; 207 goto bail;
204 } 208 }
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7a4868196152..09de77ce002a 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -82,7 +82,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
82 } 82 }
83 83
84 status = -EACCES; 84 status = -EACCES;
85 if (!is_owner_or_cap(inode)) 85 if (!inode_owner_or_capable(inode))
86 goto bail_unlock; 86 goto bail_unlock;
87 87
88 if (!S_ISDIR(inode->i_mode)) 88 if (!S_ISDIR(inode->i_mode))
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 849fb4a2e814..d6c25d76b537 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -293,7 +293,7 @@ static int ocfs2_mknod(struct inode *dir,
293 } 293 }
294 294
295 /* get security xattr */ 295 /* get security xattr */
296 status = ocfs2_init_security_get(inode, dir, &si); 296 status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
297 if (status) { 297 if (status) {
298 if (status == -EOPNOTSUPP) 298 if (status == -EOPNOTSUPP)
299 si.enable = 0; 299 si.enable = 0;
@@ -1665,7 +1665,7 @@ static int ocfs2_symlink(struct inode *dir,
1665 } 1665 }
1666 1666
1667 /* get security xattr */ 1667 /* get security xattr */
1668 status = ocfs2_init_security_get(inode, dir, &si); 1668 status = ocfs2_init_security_get(inode, dir, &dentry->d_name, &si);
1669 if (status) { 1669 if (status) {
1670 if (status == -EOPNOTSUPP) 1670 if (status == -EOPNOTSUPP)
1671 si.enable = 0; 1671 si.enable = 0;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 51cd6898e7f1..1a97ba1ec3fc 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -831,18 +831,18 @@ static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
831 831
832static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) 832static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
833{ 833{
834 ext2_set_bit(bit, bitmap); 834 __test_and_set_bit_le(bit, bitmap);
835} 835}
836#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr)) 836#define ocfs2_set_bit(bit, addr) _ocfs2_set_bit((bit), (unsigned long *)(addr))
837 837
838static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap) 838static inline void _ocfs2_clear_bit(unsigned int bit, unsigned long *bitmap)
839{ 839{
840 ext2_clear_bit(bit, bitmap); 840 __test_and_clear_bit_le(bit, bitmap);
841} 841}
842#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr)) 842#define ocfs2_clear_bit(bit, addr) _ocfs2_clear_bit((bit), (unsigned long *)(addr))
843 843
844#define ocfs2_test_bit ext2_test_bit 844#define ocfs2_test_bit test_bit_le
845#define ocfs2_find_next_zero_bit ext2_find_next_zero_bit 845#define ocfs2_find_next_zero_bit find_next_zero_bit_le
846#define ocfs2_find_next_bit ext2_find_next_bit 846#define ocfs2_find_next_bit find_next_bit_le
847#endif /* OCFS2_H */ 847#endif /* OCFS2_H */
848 848
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 196fcb52d95d..d5ab56cbe5c5 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -114,7 +114,4 @@ int ocfs2_local_write_dquot(struct dquot *dquot);
114extern const struct dquot_operations ocfs2_quota_operations; 114extern const struct dquot_operations ocfs2_quota_operations;
115extern struct quota_format_type ocfs2_quota_format; 115extern struct quota_format_type ocfs2_quota_format;
116 116
117int ocfs2_quota_setup(void);
118void ocfs2_quota_shutdown(void);
119
120#endif /* _OCFS2_QUOTA_H */ 117#endif /* _OCFS2_QUOTA_H */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 4607923eb24c..a73f64166481 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -63,8 +63,6 @@
63 * write to gf 63 * write to gf
64 */ 64 */
65 65
66static struct workqueue_struct *ocfs2_quota_wq = NULL;
67
68static void qsync_work_fn(struct work_struct *work); 66static void qsync_work_fn(struct work_struct *work);
69 67
70static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) 68static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp)
@@ -400,8 +398,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
400 OCFS2_QBLK_RESERVED_SPACE; 398 OCFS2_QBLK_RESERVED_SPACE;
401 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 399 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
402 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 400 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
403 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 401 schedule_delayed_work(&oinfo->dqi_sync_work,
404 msecs_to_jiffies(oinfo->dqi_syncms)); 402 msecs_to_jiffies(oinfo->dqi_syncms));
405 403
406out_err: 404out_err:
407 mlog_exit(status); 405 mlog_exit(status);
@@ -635,8 +633,8 @@ static void qsync_work_fn(struct work_struct *work)
635 struct super_block *sb = oinfo->dqi_gqinode->i_sb; 633 struct super_block *sb = oinfo->dqi_gqinode->i_sb;
636 634
637 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 635 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
638 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 636 schedule_delayed_work(&oinfo->dqi_sync_work,
639 msecs_to_jiffies(oinfo->dqi_syncms)); 637 msecs_to_jiffies(oinfo->dqi_syncms));
640} 638}
641 639
642/* 640/*
@@ -923,20 +921,3 @@ const struct dquot_operations ocfs2_quota_operations = {
923 .alloc_dquot = ocfs2_alloc_dquot, 921 .alloc_dquot = ocfs2_alloc_dquot,
924 .destroy_dquot = ocfs2_destroy_dquot, 922 .destroy_dquot = ocfs2_destroy_dquot,
925}; 923};
926
927int ocfs2_quota_setup(void)
928{
929 ocfs2_quota_wq = create_workqueue("o2quot");
930 if (!ocfs2_quota_wq)
931 return -ENOMEM;
932 return 0;
933}
934
935void ocfs2_quota_shutdown(void)
936{
937 if (ocfs2_quota_wq) {
938 flush_workqueue(ocfs2_quota_wq);
939 destroy_workqueue(ocfs2_quota_wq);
940 ocfs2_quota_wq = NULL;
941 }
942}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 19ebc5aad391..c384d634872a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4328,7 +4328,8 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4328 4328
4329 /* If the security isn't preserved, we need to re-initialize them. */ 4329 /* If the security isn't preserved, we need to re-initialize them. */
4330 if (!preserve) { 4330 if (!preserve) {
4331 error = ocfs2_init_security_and_acl(dir, new_orphan_inode); 4331 error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
4332 &new_dentry->d_name);
4332 if (error) 4333 if (error)
4333 mlog_errno(error); 4334 mlog_errno(error);
4334 } 4335 }
@@ -4379,7 +4380,7 @@ static int ocfs2_user_path_parent(const char __user *path,
4379 if (IS_ERR(s)) 4380 if (IS_ERR(s))
4380 return PTR_ERR(s); 4381 return PTR_ERR(s);
4381 4382
4382 error = path_lookup(s, LOOKUP_PARENT, nd); 4383 error = kern_path_parent(s, nd);
4383 if (error) 4384 if (error)
4384 putname(s); 4385 putname(s);
4385 else 4386 else
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 36c423fb0635..236ed1bdca2c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1657,16 +1657,11 @@ static int __init ocfs2_init(void)
1657 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); 1657 mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
1658 } 1658 }
1659 1659
1660 status = ocfs2_quota_setup();
1661 if (status)
1662 goto leave;
1663
1664 ocfs2_set_locking_protocol(); 1660 ocfs2_set_locking_protocol();
1665 1661
1666 status = register_quota_format(&ocfs2_quota_format); 1662 status = register_quota_format(&ocfs2_quota_format);
1667leave: 1663leave:
1668 if (status < 0) { 1664 if (status < 0) {
1669 ocfs2_quota_shutdown();
1670 ocfs2_free_mem_caches(); 1665 ocfs2_free_mem_caches();
1671 exit_ocfs2_uptodate_cache(); 1666 exit_ocfs2_uptodate_cache();
1672 } 1667 }
@@ -1683,8 +1678,6 @@ static void __exit ocfs2_exit(void)
1683{ 1678{
1684 mlog_entry_void(); 1679 mlog_entry_void();
1685 1680
1686 ocfs2_quota_shutdown();
1687
1688 if (ocfs2_wq) { 1681 if (ocfs2_wq) {
1689 flush_workqueue(ocfs2_wq); 1682 flush_workqueue(ocfs2_wq);
1690 destroy_workqueue(ocfs2_wq); 1683 destroy_workqueue(ocfs2_wq);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 67cd43914641..6bb602486c6b 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7185,7 +7185,8 @@ out:
7185 * must not hold any lock expect i_mutex. 7185 * must not hold any lock expect i_mutex.
7186 */ 7186 */
7187int ocfs2_init_security_and_acl(struct inode *dir, 7187int ocfs2_init_security_and_acl(struct inode *dir,
7188 struct inode *inode) 7188 struct inode *inode,
7189 const struct qstr *qstr)
7189{ 7190{
7190 int ret = 0; 7191 int ret = 0;
7191 struct buffer_head *dir_bh = NULL; 7192 struct buffer_head *dir_bh = NULL;
@@ -7193,7 +7194,7 @@ int ocfs2_init_security_and_acl(struct inode *dir,
7193 .enable = 1, 7194 .enable = 1,
7194 }; 7195 };
7195 7196
7196 ret = ocfs2_init_security_get(inode, dir, &si); 7197 ret = ocfs2_init_security_get(inode, dir, qstr, &si);
7197 if (!ret) { 7198 if (!ret) {
7198 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, 7199 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7199 si.name, si.value, si.value_len, 7200 si.name, si.value, si.value_len,
@@ -7261,13 +7262,14 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7261 7262
7262int ocfs2_init_security_get(struct inode *inode, 7263int ocfs2_init_security_get(struct inode *inode,
7263 struct inode *dir, 7264 struct inode *dir,
7265 const struct qstr *qstr,
7264 struct ocfs2_security_xattr_info *si) 7266 struct ocfs2_security_xattr_info *si)
7265{ 7267{
7266 /* check whether ocfs2 support feature xattr */ 7268 /* check whether ocfs2 support feature xattr */
7267 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb))) 7269 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7268 return -EOPNOTSUPP; 7270 return -EOPNOTSUPP;
7269 return security_inode_init_security(inode, dir, &si->name, &si->value, 7271 return security_inode_init_security(inode, dir, qstr, &si->name,
7270 &si->value_len); 7272 &si->value, &si->value_len);
7271} 7273}
7272 7274
7273int ocfs2_init_security_set(handle_t *handle, 7275int ocfs2_init_security_set(handle_t *handle,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index aa64bb37a65b..d63cfb72316b 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -57,6 +57,7 @@ int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
57 struct ocfs2_dinode *di); 57 struct ocfs2_dinode *di);
58int ocfs2_xattr_remove(struct inode *, struct buffer_head *); 58int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
59int ocfs2_init_security_get(struct inode *, struct inode *, 59int ocfs2_init_security_get(struct inode *, struct inode *,
60 const struct qstr *,
60 struct ocfs2_security_xattr_info *); 61 struct ocfs2_security_xattr_info *);
61int ocfs2_init_security_set(handle_t *, struct inode *, 62int ocfs2_init_security_set(handle_t *, struct inode *,
62 struct buffer_head *, 63 struct buffer_head *,
@@ -94,5 +95,6 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
94 struct buffer_head *new_bh, 95 struct buffer_head *new_bh,
95 bool preserve_security); 96 bool preserve_security);
96int ocfs2_init_security_and_acl(struct inode *dir, 97int ocfs2_init_security_and_acl(struct inode *dir,
97 struct inode *inode); 98 struct inode *inode,
99 const struct qstr *qstr);
98#endif /* OCFS2_XATTR_H */ 100#endif /* OCFS2_XATTR_H */
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 393f3f659da7..de4ff29f1e05 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -235,33 +235,22 @@ static int omfs_dir_is_empty(struct inode *inode)
235 return *ptr != ~0; 235 return *ptr != ~0;
236} 236}
237 237
238static int omfs_unlink(struct inode *dir, struct dentry *dentry) 238static int omfs_remove(struct inode *dir, struct dentry *dentry)
239{ 239{
240 int ret;
241 struct inode *inode = dentry->d_inode; 240 struct inode *inode = dentry->d_inode;
241 int ret;
242
243 if (S_ISDIR(inode->i_mode) && !omfs_dir_is_empty(inode))
244 return -ENOTEMPTY;
242 245
243 ret = omfs_delete_entry(dentry); 246 ret = omfs_delete_entry(dentry);
244 if (ret) 247 if (ret)
245 goto end_unlink; 248 return ret;
246 249
247 inode_dec_link_count(inode); 250 clear_nlink(inode);
251 mark_inode_dirty(inode);
248 mark_inode_dirty(dir); 252 mark_inode_dirty(dir);
249 253 return 0;
250end_unlink:
251 return ret;
252}
253
254static int omfs_rmdir(struct inode *dir, struct dentry *dentry)
255{
256 int err = -ENOTEMPTY;
257 struct inode *inode = dentry->d_inode;
258
259 if (omfs_dir_is_empty(inode)) {
260 err = omfs_unlink(dir, dentry);
261 if (!err)
262 inode_dec_link_count(inode);
263 }
264 return err;
265} 254}
266 255
267static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode) 256static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode)
@@ -372,9 +361,10 @@ static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
372 361
373 res = filldir(dirent, oi->i_name, strnlen(oi->i_name, 362 res = filldir(dirent, oi->i_name, strnlen(oi->i_name,
374 OMFS_NAMELEN), filp->f_pos, self, d_type); 363 OMFS_NAMELEN), filp->f_pos, self, d_type);
375 if (res == 0)
376 filp->f_pos++;
377 brelse(bh); 364 brelse(bh);
365 if (res < 0)
366 break;
367 filp->f_pos++;
378 } 368 }
379out: 369out:
380 return res; 370 return res;
@@ -385,44 +375,28 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
385{ 375{
386 struct inode *new_inode = new_dentry->d_inode; 376 struct inode *new_inode = new_dentry->d_inode;
387 struct inode *old_inode = old_dentry->d_inode; 377 struct inode *old_inode = old_dentry->d_inode;
388 struct buffer_head *bh;
389 int is_dir;
390 int err; 378 int err;
391 379
392 is_dir = S_ISDIR(old_inode->i_mode);
393
394 if (new_inode) { 380 if (new_inode) {
395 /* overwriting existing file/dir */ 381 /* overwriting existing file/dir */
396 err = -ENOTEMPTY; 382 err = omfs_remove(new_dir, new_dentry);
397 if (is_dir && !omfs_dir_is_empty(new_inode))
398 goto out;
399
400 err = -ENOENT;
401 bh = omfs_find_entry(new_dir, new_dentry->d_name.name,
402 new_dentry->d_name.len);
403 if (IS_ERR(bh))
404 goto out;
405 brelse(bh);
406
407 err = omfs_unlink(new_dir, new_dentry);
408 if (err) 383 if (err)
409 goto out; 384 goto out;
410 } 385 }
411 386
412 /* since omfs locates files by name, we need to unlink _before_ 387 /* since omfs locates files by name, we need to unlink _before_
413 * adding the new link or we won't find the old one */ 388 * adding the new link or we won't find the old one */
414 inode_inc_link_count(old_inode); 389 err = omfs_delete_entry(old_dentry);
415 err = omfs_unlink(old_dir, old_dentry); 390 if (err)
416 if (err) {
417 inode_dec_link_count(old_inode);
418 goto out; 391 goto out;
419 }
420 392
393 mark_inode_dirty(old_dir);
421 err = omfs_add_link(new_dentry, old_inode); 394 err = omfs_add_link(new_dentry, old_inode);
422 if (err) 395 if (err)
423 goto out; 396 goto out;
424 397
425 old_inode->i_ctime = CURRENT_TIME_SEC; 398 old_inode->i_ctime = CURRENT_TIME_SEC;
399 mark_inode_dirty(old_inode);
426out: 400out:
427 return err; 401 return err;
428} 402}
@@ -488,8 +462,8 @@ const struct inode_operations omfs_dir_inops = {
488 .mkdir = omfs_mkdir, 462 .mkdir = omfs_mkdir,
489 .rename = omfs_rename, 463 .rename = omfs_rename,
490 .create = omfs_create, 464 .create = omfs_create,
491 .unlink = omfs_unlink, 465 .unlink = omfs_remove,
492 .rmdir = omfs_rmdir, 466 .rmdir = omfs_remove,
493}; 467};
494 468
495const struct file_operations omfs_dir_operations = { 469const struct file_operations omfs_dir_operations = {
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b5..b52cf013ffa1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -233,6 +233,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
233 233
234 if (!(file->f_mode & FMODE_WRITE)) 234 if (!(file->f_mode & FMODE_WRITE))
235 return -EBADF; 235 return -EBADF;
236
237 /* It's not possible punch hole on append only file */
238 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
239 return -EPERM;
240
241 if (IS_IMMUTABLE(inode))
242 return -EPERM;
243
236 /* 244 /*
237 * Revalidate the write permissions, in case security policy has 245 * Revalidate the write permissions, in case security policy has
238 * changed since the files were opened. 246 * changed since the files were opened.
@@ -565,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
565{ 573{
566 struct path path; 574 struct path path;
567 int error = -EINVAL; 575 int error = -EINVAL;
568 int follow; 576 int lookup_flags;
569 577
570 if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) 578 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
571 goto out; 579 goto out;
572 580
573 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 581 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
574 error = user_path_at(dfd, filename, follow, &path); 582 if (flag & AT_EMPTY_PATH)
583 lookup_flags |= LOOKUP_EMPTY;
584 error = user_path_at(dfd, filename, lookup_flags, &path);
575 if (error) 585 if (error)
576 goto out; 586 goto out;
577 error = mnt_want_write(path.mnt); 587 error = mnt_want_write(path.mnt);
@@ -661,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
661 int (*open)(struct inode *, struct file *), 671 int (*open)(struct inode *, struct file *),
662 const struct cred *cred) 672 const struct cred *cred)
663{ 673{
674 static const struct file_operations empty_fops = {};
664 struct inode *inode; 675 struct inode *inode;
665 int error; 676 int error;
666 677
667 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | 678 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
668 FMODE_PREAD | FMODE_PWRITE; 679 FMODE_PREAD | FMODE_PWRITE;
680
681 if (unlikely(f->f_flags & O_PATH))
682 f->f_mode = FMODE_PATH;
683
669 inode = dentry->d_inode; 684 inode = dentry->d_inode;
670 if (f->f_mode & FMODE_WRITE) { 685 if (f->f_mode & FMODE_WRITE) {
671 error = __get_file_write_access(inode, mnt); 686 error = __get_file_write_access(inode, mnt);
@@ -679,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
679 f->f_path.dentry = dentry; 694 f->f_path.dentry = dentry;
680 f->f_path.mnt = mnt; 695 f->f_path.mnt = mnt;
681 f->f_pos = 0; 696 f->f_pos = 0;
682 f->f_op = fops_get(inode->i_fop);
683 file_sb_list_add(f, inode->i_sb); 697 file_sb_list_add(f, inode->i_sb);
684 698
699 if (unlikely(f->f_mode & FMODE_PATH)) {
700 f->f_op = &empty_fops;
701 return f;
702 }
703
704 f->f_op = fops_get(inode->i_fop);
705
685 error = security_dentry_open(f, cred); 706 error = security_dentry_open(f, cred);
686 if (error) 707 if (error)
687 goto cleanup_all; 708 goto cleanup_all;
@@ -693,7 +714,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
693 if (error) 714 if (error)
694 goto cleanup_all; 715 goto cleanup_all;
695 } 716 }
696 ima_counts_get(f); 717 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
718 i_readcount_inc(inode);
697 719
698 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); 720 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
699 721
@@ -813,17 +835,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
813 835
814 validate_creds(cred); 836 validate_creds(cred);
815 837
816 /* 838 /* We must always pass in a valid mount pointer. */
817 * We must always pass in a valid mount pointer. Historically 839 BUG_ON(!mnt);
818 * callers got away with not passing it, but we must enforce this at
819 * the earliest possible point now to avoid strange problems deep in the
820 * filesystem stack.
821 */
822 if (!mnt) {
823 printk(KERN_WARNING "%s called with NULL vfsmount\n", __func__);
824 dump_stack();
825 return ERR_PTR(-EINVAL);
826 }
827 840
828 error = -ENFILE; 841 error = -ENFILE;
829 f = get_empty_filp(); 842 f = get_empty_filp();
@@ -882,15 +895,110 @@ void fd_install(unsigned int fd, struct file *file)
882 895
883EXPORT_SYMBOL(fd_install); 896EXPORT_SYMBOL(fd_install);
884 897
898static inline int build_open_flags(int flags, int mode, struct open_flags *op)
899{
900 int lookup_flags = 0;
901 int acc_mode;
902
903 if (!(flags & O_CREAT))
904 mode = 0;
905 op->mode = mode;
906
907 /* Must never be set by userspace */
908 flags &= ~FMODE_NONOTIFY;
909
910 /*
911 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
912 * check for O_DSYNC if the need any syncing at all we enforce it's
913 * always set instead of having to deal with possibly weird behaviour
914 * for malicious applications setting only __O_SYNC.
915 */
916 if (flags & __O_SYNC)
917 flags |= O_DSYNC;
918
919 /*
920 * If we have O_PATH in the open flag. Then we
921 * cannot have anything other than the below set of flags
922 */
923 if (flags & O_PATH) {
924 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
925 acc_mode = 0;
926 } else {
927 acc_mode = MAY_OPEN | ACC_MODE(flags);
928 }
929
930 op->open_flag = flags;
931
932 /* O_TRUNC implies we need access checks for write permissions */
933 if (flags & O_TRUNC)
934 acc_mode |= MAY_WRITE;
935
936 /* Allow the LSM permission hook to distinguish append
937 access from general write access. */
938 if (flags & O_APPEND)
939 acc_mode |= MAY_APPEND;
940
941 op->acc_mode = acc_mode;
942
943 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
944
945 if (flags & O_CREAT) {
946 op->intent |= LOOKUP_CREATE;
947 if (flags & O_EXCL)
948 op->intent |= LOOKUP_EXCL;
949 }
950
951 if (flags & O_DIRECTORY)
952 lookup_flags |= LOOKUP_DIRECTORY;
953 if (!(flags & O_NOFOLLOW))
954 lookup_flags |= LOOKUP_FOLLOW;
955 return lookup_flags;
956}
957
958/**
959 * filp_open - open file and return file pointer
960 *
961 * @filename: path to open
962 * @flags: open flags as per the open(2) second argument
963 * @mode: mode for the new file if O_CREAT is set, else ignored
964 *
965 * This is the helper to open a file from kernelspace if you really
966 * have to. But in generally you should not do this, so please move
967 * along, nothing to see here..
968 */
969struct file *filp_open(const char *filename, int flags, int mode)
970{
971 struct open_flags op;
972 int lookup = build_open_flags(flags, mode, &op);
973 return do_filp_open(AT_FDCWD, filename, &op, lookup);
974}
975EXPORT_SYMBOL(filp_open);
976
977struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
978 const char *filename, int flags)
979{
980 struct open_flags op;
981 int lookup = build_open_flags(flags, 0, &op);
982 if (flags & O_CREAT)
983 return ERR_PTR(-EINVAL);
984 if (!filename && (flags & O_DIRECTORY))
985 if (!dentry->d_inode->i_op->lookup)
986 return ERR_PTR(-ENOTDIR);
987 return do_file_open_root(dentry, mnt, filename, &op, lookup);
988}
989EXPORT_SYMBOL(file_open_root);
990
885long do_sys_open(int dfd, const char __user *filename, int flags, int mode) 991long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
886{ 992{
993 struct open_flags op;
994 int lookup = build_open_flags(flags, mode, &op);
887 char *tmp = getname(filename); 995 char *tmp = getname(filename);
888 int fd = PTR_ERR(tmp); 996 int fd = PTR_ERR(tmp);
889 997
890 if (!IS_ERR(tmp)) { 998 if (!IS_ERR(tmp)) {
891 fd = get_unused_fd_flags(flags); 999 fd = get_unused_fd_flags(flags);
892 if (fd >= 0) { 1000 if (fd >= 0) {
893 struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); 1001 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
894 if (IS_ERR(f)) { 1002 if (IS_ERR(f)) {
895 put_unused_fd(fd); 1003 put_unused_fd(fd);
896 fd = PTR_ERR(f); 1004 fd = PTR_ERR(f);
@@ -960,8 +1068,10 @@ int filp_close(struct file *filp, fl_owner_t id)
960 if (filp->f_op && filp->f_op->flush) 1068 if (filp->f_op && filp->f_op->flush)
961 retval = filp->f_op->flush(filp, id); 1069 retval = filp->f_op->flush(filp, id);
962 1070
963 dnotify_flush(filp, id); 1071 if (likely(!(filp->f_mode & FMODE_PATH))) {
964 locks_remove_posix(filp, id); 1072 dnotify_flush(filp, id);
1073 locks_remove_posix(filp, id);
1074 }
965 fput(filp); 1075 fput(filp);
966 return retval; 1076 return retval;
967} 1077}
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c
index 48cec7cbca17..764b86a01965 100644
--- a/fs/partitions/osf.c
+++ b/fs/partitions/osf.c
@@ -10,10 +10,13 @@
10#include "check.h" 10#include "check.h"
11#include "osf.h" 11#include "osf.h"
12 12
13#define MAX_OSF_PARTITIONS 18
14
13int osf_partition(struct parsed_partitions *state) 15int osf_partition(struct parsed_partitions *state)
14{ 16{
15 int i; 17 int i;
16 int slot = 1; 18 int slot = 1;
19 unsigned int npartitions;
17 Sector sect; 20 Sector sect;
18 unsigned char *data; 21 unsigned char *data;
19 struct disklabel { 22 struct disklabel {
@@ -45,7 +48,7 @@ int osf_partition(struct parsed_partitions *state)
45 u8 p_fstype; 48 u8 p_fstype;
46 u8 p_frag; 49 u8 p_frag;
47 __le16 p_cpg; 50 __le16 p_cpg;
48 } d_partitions[8]; 51 } d_partitions[MAX_OSF_PARTITIONS];
49 } * label; 52 } * label;
50 struct d_partition * partition; 53 struct d_partition * partition;
51 54
@@ -63,7 +66,12 @@ int osf_partition(struct parsed_partitions *state)
63 put_dev_sector(sect); 66 put_dev_sector(sect);
64 return 0; 67 return 0;
65 } 68 }
66 for (i = 0 ; i < le16_to_cpu(label->d_npartitions); i++, partition++) { 69 npartitions = le16_to_cpu(label->d_npartitions);
70 if (npartitions > MAX_OSF_PARTITIONS) {
71 put_dev_sector(sect);
72 return 0;
73 }
74 for (i = 0 ; i < npartitions; i++, partition++) {
67 if (slot == state->limit) 75 if (slot == state->limit)
68 break; 76 break;
69 if (le32_to_cpu(partition->p_size)) 77 if (le32_to_cpu(partition->p_size))
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 7c99c1cf7e5c..5e4f776b0917 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -489,8 +489,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
489 vsize, 489 vsize,
490 mm ? get_mm_rss(mm) : 0, 490 mm ? get_mm_rss(mm) : 0,
491 rsslim, 491 rsslim,
492 mm ? mm->start_code : 0, 492 mm ? (permitted ? mm->start_code : 1) : 0,
493 mm ? mm->end_code : 0, 493 mm ? (permitted ? mm->end_code : 1) : 0,
494 (permitted && mm) ? mm->start_stack : 0, 494 (permitted && mm) ? mm->start_stack : 0,
495 esp, 495 esp,
496 eip, 496 eip,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9d096e82b201..5a670c11aeac 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -191,17 +191,20 @@ static int proc_root_link(struct inode *inode, struct path *path)
191 return result; 191 return result;
192} 192}
193 193
194/* 194static struct mm_struct *__check_mem_permission(struct task_struct *task)
195 * Return zero if current may access user memory in @task, -error if not.
196 */
197static int check_mem_permission(struct task_struct *task)
198{ 195{
196 struct mm_struct *mm;
197
198 mm = get_task_mm(task);
199 if (!mm)
200 return ERR_PTR(-EINVAL);
201
199 /* 202 /*
200 * A task can always look at itself, in case it chooses 203 * A task can always look at itself, in case it chooses
201 * to use system calls instead of load instructions. 204 * to use system calls instead of load instructions.
202 */ 205 */
203 if (task == current) 206 if (task == current)
204 return 0; 207 return mm;
205 208
206 /* 209 /*
207 * If current is actively ptrace'ing, and would also be 210 * If current is actively ptrace'ing, and would also be
@@ -213,27 +216,53 @@ static int check_mem_permission(struct task_struct *task)
213 match = (tracehook_tracer_task(task) == current); 216 match = (tracehook_tracer_task(task) == current);
214 rcu_read_unlock(); 217 rcu_read_unlock();
215 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 218 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
216 return 0; 219 return mm;
217 } 220 }
218 221
219 /* 222 /*
220 * Noone else is allowed. 223 * Noone else is allowed.
221 */ 224 */
222 return -EPERM; 225 mmput(mm);
226 return ERR_PTR(-EPERM);
227}
228
229/*
230 * If current may access user memory in @task return a reference to the
231 * corresponding mm, otherwise ERR_PTR.
232 */
233static struct mm_struct *check_mem_permission(struct task_struct *task)
234{
235 struct mm_struct *mm;
236 int err;
237
238 /*
239 * Avoid racing if task exec's as we might get a new mm but validate
240 * against old credentials.
241 */
242 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
243 if (err)
244 return ERR_PTR(err);
245
246 mm = __check_mem_permission(task);
247 mutex_unlock(&task->signal->cred_guard_mutex);
248
249 return mm;
223} 250}
224 251
225struct mm_struct *mm_for_maps(struct task_struct *task) 252struct mm_struct *mm_for_maps(struct task_struct *task)
226{ 253{
227 struct mm_struct *mm; 254 struct mm_struct *mm;
255 int err;
228 256
229 if (mutex_lock_killable(&task->signal->cred_guard_mutex)) 257 err = mutex_lock_killable(&task->signal->cred_guard_mutex);
230 return NULL; 258 if (err)
259 return ERR_PTR(err);
231 260
232 mm = get_task_mm(task); 261 mm = get_task_mm(task);
233 if (mm && mm != current->mm && 262 if (mm && mm != current->mm &&
234 !ptrace_may_access(task, PTRACE_MODE_READ)) { 263 !ptrace_may_access(task, PTRACE_MODE_READ)) {
235 mmput(mm); 264 mmput(mm);
236 mm = NULL; 265 mm = ERR_PTR(-EACCES);
237 } 266 }
238 mutex_unlock(&task->signal->cred_guard_mutex); 267 mutex_unlock(&task->signal->cred_guard_mutex);
239 268
@@ -279,9 +308,9 @@ out:
279 308
280static int proc_pid_auxv(struct task_struct *task, char *buffer) 309static int proc_pid_auxv(struct task_struct *task, char *buffer)
281{ 310{
282 int res = 0; 311 struct mm_struct *mm = mm_for_maps(task);
283 struct mm_struct *mm = get_task_mm(task); 312 int res = PTR_ERR(mm);
284 if (mm) { 313 if (mm && !IS_ERR(mm)) {
285 unsigned int nwords = 0; 314 unsigned int nwords = 0;
286 do { 315 do {
287 nwords += 2; 316 nwords += 2;
@@ -318,6 +347,23 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer)
318} 347}
319#endif /* CONFIG_KALLSYMS */ 348#endif /* CONFIG_KALLSYMS */
320 349
350static int lock_trace(struct task_struct *task)
351{
352 int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
353 if (err)
354 return err;
355 if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
356 mutex_unlock(&task->signal->cred_guard_mutex);
357 return -EPERM;
358 }
359 return 0;
360}
361
362static void unlock_trace(struct task_struct *task)
363{
364 mutex_unlock(&task->signal->cred_guard_mutex);
365}
366
321#ifdef CONFIG_STACKTRACE 367#ifdef CONFIG_STACKTRACE
322 368
323#define MAX_STACK_TRACE_DEPTH 64 369#define MAX_STACK_TRACE_DEPTH 64
@@ -327,6 +373,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
327{ 373{
328 struct stack_trace trace; 374 struct stack_trace trace;
329 unsigned long *entries; 375 unsigned long *entries;
376 int err;
330 int i; 377 int i;
331 378
332 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); 379 entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
@@ -337,15 +384,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
337 trace.max_entries = MAX_STACK_TRACE_DEPTH; 384 trace.max_entries = MAX_STACK_TRACE_DEPTH;
338 trace.entries = entries; 385 trace.entries = entries;
339 trace.skip = 0; 386 trace.skip = 0;
340 save_stack_trace_tsk(task, &trace);
341 387
342 for (i = 0; i < trace.nr_entries; i++) { 388 err = lock_trace(task);
343 seq_printf(m, "[<%p>] %pS\n", 389 if (!err) {
344 (void *)entries[i], (void *)entries[i]); 390 save_stack_trace_tsk(task, &trace);
391
392 for (i = 0; i < trace.nr_entries; i++) {
393 seq_printf(m, "[<%pK>] %pS\n",
394 (void *)entries[i], (void *)entries[i]);
395 }
396 unlock_trace(task);
345 } 397 }
346 kfree(entries); 398 kfree(entries);
347 399
348 return 0; 400 return err;
349} 401}
350#endif 402#endif
351 403
@@ -508,18 +560,22 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
508{ 560{
509 long nr; 561 long nr;
510 unsigned long args[6], sp, pc; 562 unsigned long args[6], sp, pc;
563 int res = lock_trace(task);
564 if (res)
565 return res;
511 566
512 if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) 567 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
513 return sprintf(buffer, "running\n"); 568 res = sprintf(buffer, "running\n");
514 569 else if (nr < 0)
515 if (nr < 0) 570 res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
516 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); 571 else
517 572 res = sprintf(buffer,
518 return sprintf(buffer,
519 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", 573 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
520 nr, 574 nr,
521 args[0], args[1], args[2], args[3], args[4], args[5], 575 args[0], args[1], args[2], args[3], args[4], args[5],
522 sp, pc); 576 sp, pc);
577 unlock_trace(task);
578 return res;
523} 579}
524#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ 580#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
525 581
@@ -775,18 +831,14 @@ static ssize_t mem_read(struct file * file, char __user * buf,
775 if (!task) 831 if (!task)
776 goto out_no_task; 832 goto out_no_task;
777 833
778 if (check_mem_permission(task))
779 goto out;
780
781 ret = -ENOMEM; 834 ret = -ENOMEM;
782 page = (char *)__get_free_page(GFP_TEMPORARY); 835 page = (char *)__get_free_page(GFP_TEMPORARY);
783 if (!page) 836 if (!page)
784 goto out; 837 goto out;
785 838
786 ret = 0; 839 mm = check_mem_permission(task);
787 840 ret = PTR_ERR(mm);
788 mm = get_task_mm(task); 841 if (IS_ERR(mm))
789 if (!mm)
790 goto out_free; 842 goto out_free;
791 843
792 ret = -EIO; 844 ret = -EIO;
@@ -800,8 +852,8 @@ static ssize_t mem_read(struct file * file, char __user * buf,
800 int this_len, retval; 852 int this_len, retval;
801 853
802 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; 854 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
803 retval = access_process_vm(task, src, page, this_len, 0); 855 retval = access_remote_vm(mm, src, page, this_len, 0);
804 if (!retval || check_mem_permission(task)) { 856 if (!retval) {
805 if (!ret) 857 if (!ret)
806 ret = -EIO; 858 ret = -EIO;
807 break; 859 break;
@@ -829,10 +881,6 @@ out_no_task:
829 return ret; 881 return ret;
830} 882}
831 883
832#define mem_write NULL
833
834#ifndef mem_write
835/* This is a security hazard */
836static ssize_t mem_write(struct file * file, const char __user *buf, 884static ssize_t mem_write(struct file * file, const char __user *buf,
837 size_t count, loff_t *ppos) 885 size_t count, loff_t *ppos)
838{ 886{
@@ -840,18 +888,25 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
840 char *page; 888 char *page;
841 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); 889 struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
842 unsigned long dst = *ppos; 890 unsigned long dst = *ppos;
891 struct mm_struct *mm;
843 892
844 copied = -ESRCH; 893 copied = -ESRCH;
845 if (!task) 894 if (!task)
846 goto out_no_task; 895 goto out_no_task;
847 896
848 if (check_mem_permission(task)) 897 mm = check_mem_permission(task);
849 goto out; 898 copied = PTR_ERR(mm);
899 if (IS_ERR(mm))
900 goto out_task;
901
902 copied = -EIO;
903 if (file->private_data != (void *)((long)current->self_exec_id))
904 goto out_mm;
850 905
851 copied = -ENOMEM; 906 copied = -ENOMEM;
852 page = (char *)__get_free_page(GFP_TEMPORARY); 907 page = (char *)__get_free_page(GFP_TEMPORARY);
853 if (!page) 908 if (!page)
854 goto out; 909 goto out_mm;
855 910
856 copied = 0; 911 copied = 0;
857 while (count > 0) { 912 while (count > 0) {
@@ -862,7 +917,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
862 copied = -EFAULT; 917 copied = -EFAULT;
863 break; 918 break;
864 } 919 }
865 retval = access_process_vm(task, dst, page, this_len, 1); 920 retval = access_remote_vm(mm, dst, page, this_len, 1);
866 if (!retval) { 921 if (!retval) {
867 if (!copied) 922 if (!copied)
868 copied = -EIO; 923 copied = -EIO;
@@ -875,12 +930,13 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
875 } 930 }
876 *ppos = dst; 931 *ppos = dst;
877 free_page((unsigned long) page); 932 free_page((unsigned long) page);
878out: 933out_mm:
934 mmput(mm);
935out_task:
879 put_task_struct(task); 936 put_task_struct(task);
880out_no_task: 937out_no_task:
881 return copied; 938 return copied;
882} 939}
883#endif
884 940
885loff_t mem_lseek(struct file *file, loff_t offset, int orig) 941loff_t mem_lseek(struct file *file, loff_t offset, int orig)
886{ 942{
@@ -917,20 +973,18 @@ static ssize_t environ_read(struct file *file, char __user *buf,
917 if (!task) 973 if (!task)
918 goto out_no_task; 974 goto out_no_task;
919 975
920 if (!ptrace_may_access(task, PTRACE_MODE_READ))
921 goto out;
922
923 ret = -ENOMEM; 976 ret = -ENOMEM;
924 page = (char *)__get_free_page(GFP_TEMPORARY); 977 page = (char *)__get_free_page(GFP_TEMPORARY);
925 if (!page) 978 if (!page)
926 goto out; 979 goto out;
927 980
928 ret = 0;
929 981
930 mm = get_task_mm(task); 982 mm = mm_for_maps(task);
931 if (!mm) 983 ret = PTR_ERR(mm);
984 if (!mm || IS_ERR(mm))
932 goto out_free; 985 goto out_free;
933 986
987 ret = 0;
934 while (count > 0) { 988 while (count > 0) {
935 int this_len, retval, max_len; 989 int this_len, retval, max_len;
936 990
@@ -2620,35 +2674,6 @@ static const struct pid_entry proc_base_stuff[] = {
2620 &proc_self_inode_operations, NULL, {}), 2674 &proc_self_inode_operations, NULL, {}),
2621}; 2675};
2622 2676
2623/*
2624 * Exceptional case: normally we are not allowed to unhash a busy
2625 * directory. In this case, however, we can do it - no aliasing problems
2626 * due to the way we treat inodes.
2627 */
2628static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
2629{
2630 struct inode *inode;
2631 struct task_struct *task;
2632
2633 if (nd->flags & LOOKUP_RCU)
2634 return -ECHILD;
2635
2636 inode = dentry->d_inode;
2637 task = get_proc_task(inode);
2638 if (task) {
2639 put_task_struct(task);
2640 return 1;
2641 }
2642 d_drop(dentry);
2643 return 0;
2644}
2645
2646static const struct dentry_operations proc_base_dentry_operations =
2647{
2648 .d_revalidate = proc_base_revalidate,
2649 .d_delete = pid_delete_dentry,
2650};
2651
2652static struct dentry *proc_base_instantiate(struct inode *dir, 2677static struct dentry *proc_base_instantiate(struct inode *dir,
2653 struct dentry *dentry, struct task_struct *task, const void *ptr) 2678 struct dentry *dentry, struct task_struct *task, const void *ptr)
2654{ 2679{
@@ -2685,7 +2710,6 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
2685 if (p->fop) 2710 if (p->fop)
2686 inode->i_fop = p->fop; 2711 inode->i_fop = p->fop;
2687 ei->op = p->op; 2712 ei->op = p->op;
2688 d_set_d_op(dentry, &proc_base_dentry_operations);
2689 d_add(dentry, inode); 2713 d_add(dentry, inode);
2690 error = NULL; 2714 error = NULL;
2691out: 2715out:
@@ -2778,8 +2802,12 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2778static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, 2802static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
2779 struct pid *pid, struct task_struct *task) 2803 struct pid *pid, struct task_struct *task)
2780{ 2804{
2781 seq_printf(m, "%08x\n", task->personality); 2805 int err = lock_trace(task);
2782 return 0; 2806 if (!err) {
2807 seq_printf(m, "%08x\n", task->personality);
2808 unlock_trace(task);
2809 }
2810 return err;
2783} 2811}
2784 2812
2785/* 2813/*
@@ -2798,7 +2826,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2798 REG("environ", S_IRUSR, proc_environ_operations), 2826 REG("environ", S_IRUSR, proc_environ_operations),
2799 INF("auxv", S_IRUSR, proc_pid_auxv), 2827 INF("auxv", S_IRUSR, proc_pid_auxv),
2800 ONE("status", S_IRUGO, proc_pid_status), 2828 ONE("status", S_IRUGO, proc_pid_status),
2801 ONE("personality", S_IRUSR, proc_pid_personality), 2829 ONE("personality", S_IRUGO, proc_pid_personality),
2802 INF("limits", S_IRUGO, proc_pid_limits), 2830 INF("limits", S_IRUGO, proc_pid_limits),
2803#ifdef CONFIG_SCHED_DEBUG 2831#ifdef CONFIG_SCHED_DEBUG
2804 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 2832 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
@@ -2808,7 +2836,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2808#endif 2836#endif
2809 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 2837 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
2810#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 2838#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2811 INF("syscall", S_IRUSR, proc_pid_syscall), 2839 INF("syscall", S_IRUGO, proc_pid_syscall),
2812#endif 2840#endif
2813 INF("cmdline", S_IRUGO, proc_pid_cmdline), 2841 INF("cmdline", S_IRUGO, proc_pid_cmdline),
2814 ONE("stat", S_IRUGO, proc_tgid_stat), 2842 ONE("stat", S_IRUGO, proc_tgid_stat),
@@ -2827,7 +2855,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2827#ifdef CONFIG_PROC_PAGE_MONITOR 2855#ifdef CONFIG_PROC_PAGE_MONITOR
2828 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 2856 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
2829 REG("smaps", S_IRUGO, proc_smaps_operations), 2857 REG("smaps", S_IRUGO, proc_smaps_operations),
2830 REG("pagemap", S_IRUSR, proc_pagemap_operations), 2858 REG("pagemap", S_IRUGO, proc_pagemap_operations),
2831#endif 2859#endif
2832#ifdef CONFIG_SECURITY 2860#ifdef CONFIG_SECURITY
2833 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 2861 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -2836,7 +2864,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2836 INF("wchan", S_IRUGO, proc_pid_wchan), 2864 INF("wchan", S_IRUGO, proc_pid_wchan),
2837#endif 2865#endif
2838#ifdef CONFIG_STACKTRACE 2866#ifdef CONFIG_STACKTRACE
2839 ONE("stack", S_IRUSR, proc_pid_stack), 2867 ONE("stack", S_IRUGO, proc_pid_stack),
2840#endif 2868#endif
2841#ifdef CONFIG_SCHEDSTATS 2869#ifdef CONFIG_SCHEDSTATS
2842 INF("schedstat", S_IRUGO, proc_pid_schedstat), 2870 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -3138,14 +3166,14 @@ static const struct pid_entry tid_base_stuff[] = {
3138 REG("environ", S_IRUSR, proc_environ_operations), 3166 REG("environ", S_IRUSR, proc_environ_operations),
3139 INF("auxv", S_IRUSR, proc_pid_auxv), 3167 INF("auxv", S_IRUSR, proc_pid_auxv),
3140 ONE("status", S_IRUGO, proc_pid_status), 3168 ONE("status", S_IRUGO, proc_pid_status),
3141 ONE("personality", S_IRUSR, proc_pid_personality), 3169 ONE("personality", S_IRUGO, proc_pid_personality),
3142 INF("limits", S_IRUGO, proc_pid_limits), 3170 INF("limits", S_IRUGO, proc_pid_limits),
3143#ifdef CONFIG_SCHED_DEBUG 3171#ifdef CONFIG_SCHED_DEBUG
3144 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), 3172 REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
3145#endif 3173#endif
3146 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), 3174 REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations),
3147#ifdef CONFIG_HAVE_ARCH_TRACEHOOK 3175#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3148 INF("syscall", S_IRUSR, proc_pid_syscall), 3176 INF("syscall", S_IRUGO, proc_pid_syscall),
3149#endif 3177#endif
3150 INF("cmdline", S_IRUGO, proc_pid_cmdline), 3178 INF("cmdline", S_IRUGO, proc_pid_cmdline),
3151 ONE("stat", S_IRUGO, proc_tid_stat), 3179 ONE("stat", S_IRUGO, proc_tid_stat),
@@ -3163,7 +3191,7 @@ static const struct pid_entry tid_base_stuff[] = {
3163#ifdef CONFIG_PROC_PAGE_MONITOR 3191#ifdef CONFIG_PROC_PAGE_MONITOR
3164 REG("clear_refs", S_IWUSR, proc_clear_refs_operations), 3192 REG("clear_refs", S_IWUSR, proc_clear_refs_operations),
3165 REG("smaps", S_IRUGO, proc_smaps_operations), 3193 REG("smaps", S_IRUGO, proc_smaps_operations),
3166 REG("pagemap", S_IRUSR, proc_pagemap_operations), 3194 REG("pagemap", S_IRUGO, proc_pagemap_operations),
3167#endif 3195#endif
3168#ifdef CONFIG_SECURITY 3196#ifdef CONFIG_SECURITY
3169 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), 3197 DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations),
@@ -3172,7 +3200,7 @@ static const struct pid_entry tid_base_stuff[] = {
3172 INF("wchan", S_IRUGO, proc_pid_wchan), 3200 INF("wchan", S_IRUGO, proc_pid_wchan),
3173#endif 3201#endif
3174#ifdef CONFIG_STACKTRACE 3202#ifdef CONFIG_STACKTRACE
3175 ONE("stack", S_IRUSR, proc_pid_stack), 3203 ONE("stack", S_IRUGO, proc_pid_stack),
3176#endif 3204#endif
3177#ifdef CONFIG_SCHEDSTATS 3205#ifdef CONFIG_SCHEDSTATS
3178 INF("schedstat", S_IRUGO, proc_pid_schedstat), 3206 INF("schedstat", S_IRUGO, proc_pid_schedstat),
@@ -3191,7 +3219,7 @@ static const struct pid_entry tid_base_stuff[] = {
3191 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), 3219 REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations),
3192#ifdef CONFIG_AUDITSYSCALL 3220#ifdef CONFIG_AUDITSYSCALL
3193 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), 3221 REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
3194 REG("sessionid", S_IRUSR, proc_sessionid_operations), 3222 REG("sessionid", S_IRUGO, proc_sessionid_operations),
3195#endif 3223#endif
3196#ifdef CONFIG_FAULT_INJECTION 3224#ifdef CONFIG_FAULT_INJECTION
3197 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), 3225 REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 01e07f2a188f..f1281339b6fa 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -28,7 +28,7 @@
28 28
29DEFINE_SPINLOCK(proc_subdir_lock); 29DEFINE_SPINLOCK(proc_subdir_lock);
30 30
31static int proc_match(int len, const char *name, struct proc_dir_entry *de) 31static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
32{ 32{
33 if (de->namelen != len) 33 if (de->namelen != len)
34 return 0; 34 return 0;
@@ -303,7 +303,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
303{ 303{
304 const char *cp = name, *next; 304 const char *cp = name, *next;
305 struct proc_dir_entry *de; 305 struct proc_dir_entry *de;
306 int len; 306 unsigned int len;
307 307
308 de = *ret; 308 de = *ret;
309 if (!de) 309 if (!de)
@@ -602,7 +602,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
602{ 602{
603 struct proc_dir_entry *ent = NULL; 603 struct proc_dir_entry *ent = NULL;
604 const char *fn = name; 604 const char *fn = name;
605 int len; 605 unsigned int len;
606 606
607 /* make sure name is valid */ 607 /* make sure name is valid */
608 if (!name || !strlen(name)) goto out; 608 if (!name || !strlen(name)) goto out;
@@ -786,7 +786,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
786 struct proc_dir_entry **p; 786 struct proc_dir_entry **p;
787 struct proc_dir_entry *de = NULL; 787 struct proc_dir_entry *de = NULL;
788 const char *fn = name; 788 const char *fn = name;
789 int len; 789 unsigned int len;
790 790
791 spin_lock(&proc_subdir_lock); 791 spin_lock(&proc_subdir_lock);
792 if (__xlate_proc_name(name, &parent, &fn) != 0) { 792 if (__xlate_proc_name(name, &parent, &fn) != 0) {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 176ce4cda68a..d15aa1b1cc8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -27,6 +27,7 @@
27static void proc_evict_inode(struct inode *inode) 27static void proc_evict_inode(struct inode *inode)
28{ 28{
29 struct proc_dir_entry *de; 29 struct proc_dir_entry *de;
30 struct ctl_table_header *head;
30 31
31 truncate_inode_pages(&inode->i_data, 0); 32 truncate_inode_pages(&inode->i_data, 0);
32 end_writeback(inode); 33 end_writeback(inode);
@@ -38,12 +39,13 @@ static void proc_evict_inode(struct inode *inode)
38 de = PROC_I(inode)->pde; 39 de = PROC_I(inode)->pde;
39 if (de) 40 if (de)
40 pde_put(de); 41 pde_put(de);
41 if (PROC_I(inode)->sysctl) 42 head = PROC_I(inode)->sysctl;
42 sysctl_head_put(PROC_I(inode)->sysctl); 43 if (head) {
44 rcu_assign_pointer(PROC_I(inode)->sysctl, NULL);
45 sysctl_head_put(head);
46 }
43} 47}
44 48
45struct vfsmount *proc_mnt;
46
47static struct kmem_cache * proc_inode_cachep; 49static struct kmem_cache * proc_inode_cachep;
48 50
49static struct inode *proc_alloc_inode(struct super_block *sb) 51static struct inode *proc_alloc_inode(struct super_block *sb)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 9ad561ded409..c03e8d3a3a5b 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -107,7 +107,6 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
107} 107}
108void pde_put(struct proc_dir_entry *pde); 108void pde_put(struct proc_dir_entry *pde);
109 109
110extern struct vfsmount *proc_mnt;
111int proc_fill_super(struct super_block *); 110int proc_fill_super(struct super_block *);
112struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); 111struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
113 112
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index d9396a4fc7ff..927cbd115e53 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -233,7 +233,7 @@ void __init proc_device_tree_init(void)
233 return; 233 return;
234 root = of_find_node_by_path("/"); 234 root = of_find_node_by_path("/");
235 if (root == NULL) { 235 if (root == NULL) {
236 printk(KERN_ERR "/proc/device-tree: can't find root\n"); 236 pr_debug("/proc/device-tree: can't find root\n");
237 return; 237 return;
238 } 238 }
239 proc_device_tree_add_node(root, proc_device_tree); 239 proc_device_tree_add_node(root, proc_device_tree);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 09a1f92a34ef..f50133c11c24 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -32,7 +32,6 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
32 ei->sysctl_entry = table; 32 ei->sysctl_entry = table;
33 33
34 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 34 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
35 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
36 inode->i_mode = table->mode; 35 inode->i_mode = table->mode;
37 if (!table->child) { 36 if (!table->child) {
38 inode->i_mode |= S_IFREG; 37 inode->i_mode |= S_IFREG;
@@ -408,15 +407,18 @@ static int proc_sys_compare(const struct dentry *parent,
408 const struct dentry *dentry, const struct inode *inode, 407 const struct dentry *dentry, const struct inode *inode,
409 unsigned int len, const char *str, const struct qstr *name) 408 unsigned int len, const char *str, const struct qstr *name)
410{ 409{
410 struct ctl_table_header *head;
411 /* Although proc doesn't have negative dentries, rcu-walk means 411 /* Although proc doesn't have negative dentries, rcu-walk means
412 * that inode here can be NULL */ 412 * that inode here can be NULL */
413 /* AV: can it, indeed? */
413 if (!inode) 414 if (!inode)
414 return 0; 415 return 1;
415 if (name->len != len) 416 if (name->len != len)
416 return 1; 417 return 1;
417 if (memcmp(name->name, str, len)) 418 if (memcmp(name->name, str, len))
418 return 1; 419 return 1;
419 return !sysctl_is_seen(PROC_I(inode)->sysctl); 420 head = rcu_dereference(PROC_I(inode)->sysctl);
421 return !head || !sysctl_is_seen(head);
420} 422}
421 423
422static const struct dentry_operations proc_sys_dentry_operations = { 424static const struct dentry_operations proc_sys_dentry_operations = {
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef9fa8e24ad6..a9000e9cfee5 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -43,17 +43,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
43 struct pid_namespace *ns; 43 struct pid_namespace *ns;
44 struct proc_inode *ei; 44 struct proc_inode *ei;
45 45
46 if (proc_mnt) {
47 /* Seed the root directory with a pid so it doesn't need
48 * to be special in base.c. I would do this earlier but
49 * the only task alive when /proc is mounted the first time
50 * is the init_task and it doesn't have any pids.
51 */
52 ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
53 if (!ei->pid)
54 ei->pid = find_get_pid(1);
55 }
56
57 if (flags & MS_KERNMOUNT) 46 if (flags & MS_KERNMOUNT)
58 ns = (struct pid_namespace *)data; 47 ns = (struct pid_namespace *)data;
59 else 48 else
@@ -71,16 +60,16 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
71 return ERR_PTR(err); 60 return ERR_PTR(err);
72 } 61 }
73 62
74 ei = PROC_I(sb->s_root->d_inode);
75 if (!ei->pid) {
76 rcu_read_lock();
77 ei->pid = get_pid(find_pid_ns(1, ns));
78 rcu_read_unlock();
79 }
80
81 sb->s_flags |= MS_ACTIVE; 63 sb->s_flags |= MS_ACTIVE;
82 } 64 }
83 65
66 ei = PROC_I(sb->s_root->d_inode);
67 if (!ei->pid) {
68 rcu_read_lock();
69 ei->pid = get_pid(find_pid_ns(1, ns));
70 rcu_read_unlock();
71 }
72
84 return dget(sb->s_root); 73 return dget(sb->s_root);
85} 74}
86 75
@@ -101,19 +90,20 @@ static struct file_system_type proc_fs_type = {
101 90
102void __init proc_root_init(void) 91void __init proc_root_init(void)
103{ 92{
93 struct vfsmount *mnt;
104 int err; 94 int err;
105 95
106 proc_init_inodecache(); 96 proc_init_inodecache();
107 err = register_filesystem(&proc_fs_type); 97 err = register_filesystem(&proc_fs_type);
108 if (err) 98 if (err)
109 return; 99 return;
110 proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); 100 mnt = kern_mount_data(&proc_fs_type, &init_pid_ns);
111 if (IS_ERR(proc_mnt)) { 101 if (IS_ERR(mnt)) {
112 unregister_filesystem(&proc_fs_type); 102 unregister_filesystem(&proc_fs_type);
113 return; 103 return;
114 } 104 }
115 105
116 init_pid_ns.proc_mnt = proc_mnt; 106 init_pid_ns.proc_mnt = mnt;
117 proc_symlink("mounts", NULL, "self/mounts"); 107 proc_symlink("mounts", NULL, "self/mounts");
118 108
119 proc_net_init(); 109 proc_net_init();
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 60b914860f81..7c708a418acc 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,5 +1,6 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/hugetlb.h> 2#include <linux/hugetlb.h>
3#include <linux/huge_mm.h>
3#include <linux/mount.h> 4#include <linux/mount.h>
4#include <linux/seq_file.h> 5#include <linux/seq_file.h>
5#include <linux/highmem.h> 6#include <linux/highmem.h>
@@ -7,6 +8,7 @@
7#include <linux/slab.h> 8#include <linux/slab.h>
8#include <linux/pagemap.h> 9#include <linux/pagemap.h>
9#include <linux/mempolicy.h> 10#include <linux/mempolicy.h>
11#include <linux/rmap.h>
10#include <linux/swap.h> 12#include <linux/swap.h>
11#include <linux/swapops.h> 13#include <linux/swapops.h>
12 14
@@ -119,14 +121,14 @@ static void *m_start(struct seq_file *m, loff_t *pos)
119 121
120 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 122 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
121 if (!priv->task) 123 if (!priv->task)
122 return NULL; 124 return ERR_PTR(-ESRCH);
123 125
124 mm = mm_for_maps(priv->task); 126 mm = mm_for_maps(priv->task);
125 if (!mm) 127 if (!mm || IS_ERR(mm))
126 return NULL; 128 return mm;
127 down_read(&mm->mmap_sem); 129 down_read(&mm->mmap_sem);
128 130
129 tail_vma = get_gate_vma(priv->task); 131 tail_vma = get_gate_vma(priv->task->mm);
130 priv->tail_vma = tail_vma; 132 priv->tail_vma = tail_vma;
131 133
132 /* Start with last addr hint */ 134 /* Start with last addr hint */
@@ -249,8 +251,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
249 const char *name = arch_vma_name(vma); 251 const char *name = arch_vma_name(vma);
250 if (!name) { 252 if (!name) {
251 if (mm) { 253 if (mm) {
252 if (vma->vm_start <= mm->start_brk && 254 if (vma->vm_start <= mm->brk &&
253 vma->vm_end >= mm->brk) { 255 vma->vm_end >= mm->start_brk) {
254 name = "[heap]"; 256 name = "[heap]";
255 } else if (vma->vm_start <= mm->start_stack && 257 } else if (vma->vm_start <= mm->start_stack &&
256 vma->vm_end >= mm->start_stack) { 258 vma->vm_end >= mm->start_stack) {
@@ -277,7 +279,8 @@ static int show_map(struct seq_file *m, void *v)
277 show_map_vma(m, vma); 279 show_map_vma(m, vma);
278 280
279 if (m->count < m->size) /* vma is copied successfully */ 281 if (m->count < m->size) /* vma is copied successfully */
280 m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; 282 m->version = (vma != get_gate_vma(task->mm))
283 ? vma->vm_start : 0;
281 return 0; 284 return 0;
282} 285}
283 286
@@ -329,58 +332,86 @@ struct mem_size_stats {
329 unsigned long private_dirty; 332 unsigned long private_dirty;
330 unsigned long referenced; 333 unsigned long referenced;
331 unsigned long anonymous; 334 unsigned long anonymous;
335 unsigned long anonymous_thp;
332 unsigned long swap; 336 unsigned long swap;
333 u64 pss; 337 u64 pss;
334}; 338};
335 339
336static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, 340
337 struct mm_walk *walk) 341static void smaps_pte_entry(pte_t ptent, unsigned long addr,
342 unsigned long ptent_size, struct mm_walk *walk)
338{ 343{
339 struct mem_size_stats *mss = walk->private; 344 struct mem_size_stats *mss = walk->private;
340 struct vm_area_struct *vma = mss->vma; 345 struct vm_area_struct *vma = mss->vma;
341 pte_t *pte, ptent;
342 spinlock_t *ptl;
343 struct page *page; 346 struct page *page;
344 int mapcount; 347 int mapcount;
345 348
346 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 349 if (is_swap_pte(ptent)) {
347 for (; addr != end; pte++, addr += PAGE_SIZE) { 350 mss->swap += ptent_size;
348 ptent = *pte; 351 return;
349 352 }
350 if (is_swap_pte(ptent)) {
351 mss->swap += PAGE_SIZE;
352 continue;
353 }
354 353
355 if (!pte_present(ptent)) 354 if (!pte_present(ptent))
356 continue; 355 return;
356
357 page = vm_normal_page(vma, addr, ptent);
358 if (!page)
359 return;
360
361 if (PageAnon(page))
362 mss->anonymous += ptent_size;
363
364 mss->resident += ptent_size;
365 /* Accumulate the size in pages that have been accessed. */
366 if (pte_young(ptent) || PageReferenced(page))
367 mss->referenced += ptent_size;
368 mapcount = page_mapcount(page);
369 if (mapcount >= 2) {
370 if (pte_dirty(ptent) || PageDirty(page))
371 mss->shared_dirty += ptent_size;
372 else
373 mss->shared_clean += ptent_size;
374 mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
375 } else {
376 if (pte_dirty(ptent) || PageDirty(page))
377 mss->private_dirty += ptent_size;
378 else
379 mss->private_clean += ptent_size;
380 mss->pss += (ptent_size << PSS_SHIFT);
381 }
382}
357 383
358 page = vm_normal_page(vma, addr, ptent); 384static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
359 if (!page) 385 struct mm_walk *walk)
360 continue; 386{
387 struct mem_size_stats *mss = walk->private;
388 struct vm_area_struct *vma = mss->vma;
389 pte_t *pte;
390 spinlock_t *ptl;
361 391
362 if (PageAnon(page)) 392 spin_lock(&walk->mm->page_table_lock);
363 mss->anonymous += PAGE_SIZE; 393 if (pmd_trans_huge(*pmd)) {
364 394 if (pmd_trans_splitting(*pmd)) {
365 mss->resident += PAGE_SIZE; 395 spin_unlock(&walk->mm->page_table_lock);
366 /* Accumulate the size in pages that have been accessed. */ 396 wait_split_huge_page(vma->anon_vma, pmd);
367 if (pte_young(ptent) || PageReferenced(page))
368 mss->referenced += PAGE_SIZE;
369 mapcount = page_mapcount(page);
370 if (mapcount >= 2) {
371 if (pte_dirty(ptent) || PageDirty(page))
372 mss->shared_dirty += PAGE_SIZE;
373 else
374 mss->shared_clean += PAGE_SIZE;
375 mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
376 } else { 397 } else {
377 if (pte_dirty(ptent) || PageDirty(page)) 398 smaps_pte_entry(*(pte_t *)pmd, addr,
378 mss->private_dirty += PAGE_SIZE; 399 HPAGE_PMD_SIZE, walk);
379 else 400 spin_unlock(&walk->mm->page_table_lock);
380 mss->private_clean += PAGE_SIZE; 401 mss->anonymous_thp += HPAGE_PMD_SIZE;
381 mss->pss += (PAGE_SIZE << PSS_SHIFT); 402 return 0;
382 } 403 }
404 } else {
405 spin_unlock(&walk->mm->page_table_lock);
383 } 406 }
407 /*
408 * The mmap_sem held all the way back in m_start() is what
409 * keeps khugepaged out of here and from collapsing things
410 * in here.
411 */
412 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
413 for (; addr != end; pte++, addr += PAGE_SIZE)
414 smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
384 pte_unmap_unlock(pte - 1, ptl); 415 pte_unmap_unlock(pte - 1, ptl);
385 cond_resched(); 416 cond_resched();
386 return 0; 417 return 0;
@@ -416,6 +447,7 @@ static int show_smap(struct seq_file *m, void *v)
416 "Private_Dirty: %8lu kB\n" 447 "Private_Dirty: %8lu kB\n"
417 "Referenced: %8lu kB\n" 448 "Referenced: %8lu kB\n"
418 "Anonymous: %8lu kB\n" 449 "Anonymous: %8lu kB\n"
450 "AnonHugePages: %8lu kB\n"
419 "Swap: %8lu kB\n" 451 "Swap: %8lu kB\n"
420 "KernelPageSize: %8lu kB\n" 452 "KernelPageSize: %8lu kB\n"
421 "MMUPageSize: %8lu kB\n" 453 "MMUPageSize: %8lu kB\n"
@@ -429,6 +461,7 @@ static int show_smap(struct seq_file *m, void *v)
429 mss.private_dirty >> 10, 461 mss.private_dirty >> 10,
430 mss.referenced >> 10, 462 mss.referenced >> 10,
431 mss.anonymous >> 10, 463 mss.anonymous >> 10,
464 mss.anonymous_thp >> 10,
432 mss.swap >> 10, 465 mss.swap >> 10,
433 vma_kernel_pagesize(vma) >> 10, 466 vma_kernel_pagesize(vma) >> 10,
434 vma_mmu_pagesize(vma) >> 10, 467 vma_mmu_pagesize(vma) >> 10,
@@ -436,7 +469,8 @@ static int show_smap(struct seq_file *m, void *v)
436 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); 469 (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
437 470
438 if (m->count < m->size) /* vma is copied successfully */ 471 if (m->count < m->size) /* vma is copied successfully */
439 m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; 472 m->version = (vma != get_gate_vma(task->mm))
473 ? vma->vm_start : 0;
440 return 0; 474 return 0;
441} 475}
442 476
@@ -467,6 +501,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
467 spinlock_t *ptl; 501 spinlock_t *ptl;
468 struct page *page; 502 struct page *page;
469 503
504 split_huge_page_pmd(walk->mm, pmd);
505
470 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 506 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
471 for (; addr != end; pte++, addr += PAGE_SIZE) { 507 for (; addr != end; pte++, addr += PAGE_SIZE) {
472 ptent = *pte; 508 ptent = *pte;
@@ -623,6 +659,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
623 pte_t *pte; 659 pte_t *pte;
624 int err = 0; 660 int err = 0;
625 661
662 split_huge_page_pmd(walk->mm, pmd);
663
626 /* find the first VMA at or above 'addr' */ 664 /* find the first VMA at or above 'addr' */
627 vma = find_vma(walk->mm, addr); 665 vma = find_vma(walk->mm, addr);
628 for (; addr != end; addr += PAGE_SIZE) { 666 for (; addr != end; addr += PAGE_SIZE) {
@@ -728,8 +766,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
728 if (!task) 766 if (!task)
729 goto out; 767 goto out;
730 768
731 ret = -EACCES; 769 mm = mm_for_maps(task);
732 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 770 ret = PTR_ERR(mm);
771 if (!mm || IS_ERR(mm))
733 goto out_task; 772 goto out_task;
734 773
735 ret = -EINVAL; 774 ret = -EINVAL;
@@ -742,10 +781,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
742 if (!count) 781 if (!count)
743 goto out_task; 782 goto out_task;
744 783
745 mm = get_task_mm(task);
746 if (!mm)
747 goto out_task;
748
749 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); 784 pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
750 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); 785 pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
751 ret = -ENOMEM; 786 ret = -ENOMEM;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index b535d3e5d5f1..980de547c070 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -199,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos)
199 /* pin the task and mm whilst we play with them */ 199 /* pin the task and mm whilst we play with them */
200 priv->task = get_pid_task(priv->pid, PIDTYPE_PID); 200 priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
201 if (!priv->task) 201 if (!priv->task)
202 return NULL; 202 return ERR_PTR(-ESRCH);
203 203
204 mm = mm_for_maps(priv->task); 204 mm = mm_for_maps(priv->task);
205 if (!mm) { 205 if (!mm || IS_ERR(mm)) {
206 put_task_struct(priv->task); 206 put_task_struct(priv->task);
207 priv->task = NULL; 207 priv->task = NULL;
208 return NULL; 208 return mm;
209 } 209 }
210 down_read(&mm->mmap_sem); 210 down_read(&mm->mmap_sem);
211 211
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
new file mode 100644
index 000000000000..867d0ac026ce
--- /dev/null
+++ b/fs/pstore/Kconfig
@@ -0,0 +1,13 @@
1config PSTORE
2 bool "Persistant store support"
3 default n
4 help
5 This option enables generic access to platform level
6 persistent storage via "pstore" filesystem that can
7 be mounted as /dev/pstore. Only useful if you have
8 a platform level driver that registers with pstore to
9 provide the data, so you probably should just go say "Y"
10 (or "M") to a platform specific persistent store driver
11 (e.g. ACPI_APEI on X86) which will select this for you.
12 If you don't have a platform persistent store driver,
13 say N.
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
new file mode 100644
index 000000000000..760f4bce7d1d
--- /dev/null
+++ b/fs/pstore/Makefile
@@ -0,0 +1,7 @@
1#
2# Makefile for the linux pstorefs routines.
3#
4
5obj-y += pstore.o
6
7pstore-objs += inode.o platform.o
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
new file mode 100644
index 000000000000..977ed2723845
--- /dev/null
+++ b/fs/pstore/inode.c
@@ -0,0 +1,311 @@
1/*
2 * Persistent Storage - ramfs parts.
3 *
4 * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/module.h>
21#include <linux/fs.h>
22#include <linux/fsnotify.h>
23#include <linux/pagemap.h>
24#include <linux/highmem.h>
25#include <linux/time.h>
26#include <linux/init.h>
27#include <linux/string.h>
28#include <linux/mount.h>
29#include <linux/ramfs.h>
30#include <linux/parser.h>
31#include <linux/sched.h>
32#include <linux/magic.h>
33#include <linux/pstore.h>
34#include <linux/slab.h>
35#include <linux/uaccess.h>
36
37#include "internal.h"
38
39#define PSTORE_NAMELEN 64
40
41struct pstore_private {
42 u64 id;
43 int (*erase)(u64);
44 ssize_t size;
45 char data[];
46};
47
48static int pstore_file_open(struct inode *inode, struct file *file)
49{
50 file->private_data = inode->i_private;
51 return 0;
52}
53
54static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
55 size_t count, loff_t *ppos)
56{
57 struct pstore_private *ps = file->private_data;
58
59 return simple_read_from_buffer(userbuf, count, ppos, ps->data, ps->size);
60}
61
62static const struct file_operations pstore_file_operations = {
63 .open = pstore_file_open,
64 .read = pstore_file_read,
65 .llseek = default_llseek,
66};
67
68/*
69 * When a file is unlinked from our file system we call the
70 * platform driver to erase the record from persistent store.
71 */
72static int pstore_unlink(struct inode *dir, struct dentry *dentry)
73{
74 struct pstore_private *p = dentry->d_inode->i_private;
75
76 p->erase(p->id);
77
78 return simple_unlink(dir, dentry);
79}
80
81static void pstore_evict_inode(struct inode *inode)
82{
83 end_writeback(inode);
84 kfree(inode->i_private);
85}
86
87static const struct inode_operations pstore_dir_inode_operations = {
88 .lookup = simple_lookup,
89 .unlink = pstore_unlink,
90};
91
92static struct inode *pstore_get_inode(struct super_block *sb,
93 const struct inode *dir, int mode, dev_t dev)
94{
95 struct inode *inode = new_inode(sb);
96
97 if (inode) {
98 inode->i_ino = get_next_ino();
99 inode->i_uid = inode->i_gid = 0;
100 inode->i_mode = mode;
101 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
102 switch (mode & S_IFMT) {
103 case S_IFREG:
104 inode->i_fop = &pstore_file_operations;
105 break;
106 case S_IFDIR:
107 inode->i_op = &pstore_dir_inode_operations;
108 inode->i_fop = &simple_dir_operations;
109 inc_nlink(inode);
110 break;
111 }
112 }
113 return inode;
114}
115
116enum {
117 Opt_kmsg_bytes, Opt_err
118};
119
120static const match_table_t tokens = {
121 {Opt_kmsg_bytes, "kmsg_bytes=%u"},
122 {Opt_err, NULL}
123};
124
125static void parse_options(char *options)
126{
127 char *p;
128 substring_t args[MAX_OPT_ARGS];
129 int option;
130
131 if (!options)
132 return;
133
134 while ((p = strsep(&options, ",")) != NULL) {
135 int token;
136
137 if (!*p)
138 continue;
139
140 token = match_token(p, tokens, args);
141 switch (token) {
142 case Opt_kmsg_bytes:
143 if (!match_int(&args[0], &option))
144 pstore_set_kmsg_bytes(option);
145 break;
146 }
147 }
148}
149
150static int pstore_remount(struct super_block *sb, int *flags, char *data)
151{
152 parse_options(data);
153
154 return 0;
155}
156
157static const struct super_operations pstore_ops = {
158 .statfs = simple_statfs,
159 .drop_inode = generic_delete_inode,
160 .evict_inode = pstore_evict_inode,
161 .remount_fs = pstore_remount,
162 .show_options = generic_show_options,
163};
164
165static struct super_block *pstore_sb;
166
167int pstore_is_mounted(void)
168{
169 return pstore_sb != NULL;
170}
171
172/*
173 * Make a regular file in the root directory of our file system.
174 * Load it up with "size" bytes of data from "buf".
175 * Set the mtime & ctime to the date that this record was originally stored.
176 */
177int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
178 char *data, size_t size,
179 struct timespec time, int (*erase)(u64))
180{
181 struct dentry *root = pstore_sb->s_root;
182 struct dentry *dentry;
183 struct inode *inode;
184 int rc;
185 char name[PSTORE_NAMELEN];
186 struct pstore_private *private;
187
188 rc = -ENOMEM;
189 inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0);
190 if (!inode)
191 goto fail;
192 private = kmalloc(sizeof *private + size, GFP_KERNEL);
193 if (!private)
194 goto fail_alloc;
195 private->id = id;
196 private->erase = erase;
197
198 switch (type) {
199 case PSTORE_TYPE_DMESG:
200 sprintf(name, "dmesg-%s-%lld", psname, id);
201 break;
202 case PSTORE_TYPE_MCE:
203 sprintf(name, "mce-%s-%lld", psname, id);
204 break;
205 case PSTORE_TYPE_UNKNOWN:
206 sprintf(name, "unknown-%s-%lld", psname, id);
207 break;
208 default:
209 sprintf(name, "type%d-%s-%lld", type, psname, id);
210 break;
211 }
212
213 mutex_lock(&root->d_inode->i_mutex);
214
215 rc = -ENOSPC;
216 dentry = d_alloc_name(root, name);
217 if (IS_ERR(dentry))
218 goto fail_lockedalloc;
219
220 memcpy(private->data, data, size);
221 inode->i_size = private->size = size;
222
223 inode->i_private = private;
224
225 if (time.tv_sec)
226 inode->i_mtime = inode->i_ctime = time;
227
228 d_add(dentry, inode);
229
230 mutex_unlock(&root->d_inode->i_mutex);
231
232 return 0;
233
234fail_lockedalloc:
235 mutex_unlock(&root->d_inode->i_mutex);
236 kfree(private);
237fail_alloc:
238 iput(inode);
239
240fail:
241 return rc;
242}
243
244int pstore_fill_super(struct super_block *sb, void *data, int silent)
245{
246 struct inode *inode = NULL;
247 struct dentry *root;
248 int err;
249
250 save_mount_options(sb, data);
251
252 pstore_sb = sb;
253
254 sb->s_maxbytes = MAX_LFS_FILESIZE;
255 sb->s_blocksize = PAGE_CACHE_SIZE;
256 sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
257 sb->s_magic = PSTOREFS_MAGIC;
258 sb->s_op = &pstore_ops;
259 sb->s_time_gran = 1;
260
261 parse_options(data);
262
263 inode = pstore_get_inode(sb, NULL, S_IFDIR | 0755, 0);
264 if (!inode) {
265 err = -ENOMEM;
266 goto fail;
267 }
268 /* override ramfs "dir" options so we catch unlink(2) */
269 inode->i_op = &pstore_dir_inode_operations;
270
271 root = d_alloc_root(inode);
272 sb->s_root = root;
273 if (!root) {
274 err = -ENOMEM;
275 goto fail;
276 }
277
278 pstore_get_records();
279
280 return 0;
281fail:
282 iput(inode);
283 return err;
284}
285
286static struct dentry *pstore_mount(struct file_system_type *fs_type,
287 int flags, const char *dev_name, void *data)
288{
289 return mount_single(fs_type, flags, data, pstore_fill_super);
290}
291
292static void pstore_kill_sb(struct super_block *sb)
293{
294 kill_litter_super(sb);
295 pstore_sb = NULL;
296}
297
298static struct file_system_type pstore_fs_type = {
299 .name = "pstore",
300 .mount = pstore_mount,
301 .kill_sb = pstore_kill_sb,
302};
303
304static int __init init_pstore_fs(void)
305{
306 return register_filesystem(&pstore_fs_type);
307}
308module_init(init_pstore_fs)
309
310MODULE_AUTHOR("Tony Luck <tony.luck@intel.com>");
311MODULE_LICENSE("GPL");
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
new file mode 100644
index 000000000000..8c9f23eb1645
--- /dev/null
+++ b/fs/pstore/internal.h
@@ -0,0 +1,6 @@
1extern void pstore_set_kmsg_bytes(int);
2extern void pstore_get_records(void);
3extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
4 char *data, size_t size,
5 struct timespec time, int (*erase)(u64));
6extern int pstore_is_mounted(void);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
new file mode 100644
index 000000000000..ce9ad84d5dd9
--- /dev/null
+++ b/fs/pstore/platform.c
@@ -0,0 +1,192 @@
1/*
2 * Persistent Storage - platform driver interface parts.
3 *
4 * Copyright (C) 2010 Intel Corporation <tony.luck@intel.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20#include <linux/atomic.h>
21#include <linux/types.h>
22#include <linux/errno.h>
23#include <linux/init.h>
24#include <linux/kmsg_dump.h>
25#include <linux/module.h>
26#include <linux/pstore.h>
27#include <linux/string.h>
28#include <linux/slab.h>
29#include <linux/uaccess.h>
30
31#include "internal.h"
32
33/*
34 * pstore_lock just protects "psinfo" during
35 * calls to pstore_register()
36 */
37static DEFINE_SPINLOCK(pstore_lock);
38static struct pstore_info *psinfo;
39
40/* How much of the console log to snapshot */
41static unsigned long kmsg_bytes = 10240;
42
43void pstore_set_kmsg_bytes(int bytes)
44{
45 kmsg_bytes = bytes;
46}
47
48/* Tag each group of saved records with a sequence number */
49static int oopscount;
50
51/*
52 * callback from kmsg_dump. (s2,l2) has the most recently
53 * written bytes, older bytes are in (s1,l1). Save as much
54 * as we can from the end of the buffer.
55 */
56static void pstore_dump(struct kmsg_dumper *dumper,
57 enum kmsg_dump_reason reason,
58 const char *s1, unsigned long l1,
59 const char *s2, unsigned long l2)
60{
61 unsigned long s1_start, s2_start;
62 unsigned long l1_cpy, l2_cpy;
63 unsigned long size, total = 0;
64 char *dst;
65 u64 id;
66 int hsize, part = 1;
67
68 mutex_lock(&psinfo->buf_mutex);
69 oopscount++;
70 while (total < kmsg_bytes) {
71 dst = psinfo->buf;
72 hsize = sprintf(dst, "Oops#%d Part%d\n", oopscount, part++);
73 size = psinfo->bufsize - hsize;
74 dst += hsize;
75
76 l2_cpy = min(l2, size);
77 l1_cpy = min(l1, size - l2_cpy);
78
79 if (l1_cpy + l2_cpy == 0)
80 break;
81
82 s2_start = l2 - l2_cpy;
83 s1_start = l1 - l1_cpy;
84
85 memcpy(dst, s1 + s1_start, l1_cpy);
86 memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
87
88 id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy);
89 if (pstore_is_mounted())
90 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
91 psinfo->buf, hsize + l1_cpy + l2_cpy,
92 CURRENT_TIME, psinfo->erase);
93 l1 -= l1_cpy;
94 l2 -= l2_cpy;
95 total += l1_cpy + l2_cpy;
96 }
97 mutex_unlock(&psinfo->buf_mutex);
98}
99
100static struct kmsg_dumper pstore_dumper = {
101 .dump = pstore_dump,
102};
103
104/*
105 * platform specific persistent storage driver registers with
106 * us here. If pstore is already mounted, call the platform
107 * read function right away to populate the file system. If not
108 * then the pstore mount code will call us later to fill out
109 * the file system.
110 *
111 * Register with kmsg_dump to save last part of console log on panic.
112 */
113int pstore_register(struct pstore_info *psi)
114{
115 struct module *owner = psi->owner;
116
117 spin_lock(&pstore_lock);
118 if (psinfo) {
119 spin_unlock(&pstore_lock);
120 return -EBUSY;
121 }
122 psinfo = psi;
123 spin_unlock(&pstore_lock);
124
125 if (owner && !try_module_get(owner)) {
126 psinfo = NULL;
127 return -EINVAL;
128 }
129
130 if (pstore_is_mounted())
131 pstore_get_records();
132
133 kmsg_dump_register(&pstore_dumper);
134
135 return 0;
136}
137EXPORT_SYMBOL_GPL(pstore_register);
138
139/*
140 * Read all the records from the persistent store. Create and
141 * file files in our filesystem.
142 */
143void pstore_get_records(void)
144{
145 struct pstore_info *psi = psinfo;
146 size_t size;
147 u64 id;
148 enum pstore_type_id type;
149 struct timespec time;
150 int failed = 0;
151
152 if (!psi)
153 return;
154
155 mutex_lock(&psinfo->buf_mutex);
156 while ((size = psi->read(&id, &type, &time)) > 0) {
157 if (pstore_mkfile(type, psi->name, id, psi->buf, size,
158 time, psi->erase))
159 failed++;
160 }
161 mutex_unlock(&psinfo->buf_mutex);
162
163 if (failed)
164 printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
165 failed, psi->name);
166}
167
168/*
169 * Call platform driver to write a record to the
170 * persistent store.
171 */
172int pstore_write(enum pstore_type_id type, char *buf, size_t size)
173{
174 u64 id;
175
176 if (!psinfo)
177 return -ENODEV;
178
179 if (size > psinfo->bufsize)
180 return -EFBIG;
181
182 mutex_lock(&psinfo->buf_mutex);
183 memcpy(psinfo->buf, buf, size);
184 id = psinfo->write(type, size);
185 if (pstore_is_mounted())
186 pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
187 size, CURRENT_TIME, psinfo->erase);
188 mutex_unlock(&psinfo->buf_mutex);
189
190 return 0;
191}
192EXPORT_SYMBOL_GPL(pstore_write);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 65444d29406b..f1ab3604db5a 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -112,7 +112,7 @@ static int v2_read_file_info(struct super_block *sb, int type)
112 if (!info->dqi_priv) { 112 if (!info->dqi_priv) {
113 printk(KERN_WARNING 113 printk(KERN_WARNING
114 "Not enough memory for quota information structure.\n"); 114 "Not enough memory for quota information structure.\n");
115 return -1; 115 return -ENOMEM;
116 } 116 }
117 qinfo = info->dqi_priv; 117 qinfo = info->dqi_priv;
118 if (version == 0) { 118 if (version == 0) {
diff --git a/fs/reiserfs/Makefile b/fs/reiserfs/Makefile
index 792b3cb2cd18..3c3b00165114 100644
--- a/fs/reiserfs/Makefile
+++ b/fs/reiserfs/Makefile
@@ -31,9 +31,7 @@ endif
31# and causing a panic. Since this behavior only affects ppc32, this ifeq 31# and causing a panic. Since this behavior only affects ppc32, this ifeq
32# will work around it. If any other architecture displays this behavior, 32# will work around it. If any other architecture displays this behavior,
33# add it here. 33# add it here.
34ifeq ($(CONFIG_PPC32),y) 34ccflags-$(CONFIG_PPC32) := $(call cc-ifversion, -lt, 0400, -O1)
35EXTRA_CFLAGS := $(call cc-ifversion, -lt, 0400, -O1)
36endif
37 35
38TAGS: 36TAGS:
39 etags *.c 37 etags *.c
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 0bae036831e2..1bba24bad820 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
1593 struct inode *inode = dentry->d_inode; 1593 struct inode *inode = dentry->d_inode;
1594 int maxlen = *lenp; 1594 int maxlen = *lenp;
1595 1595
1596 if (maxlen < 3) 1596 if (need_parent && (maxlen < 5)) {
1597 *lenp = 5;
1597 return 255; 1598 return 255;
1599 } else if (maxlen < 3) {
1600 *lenp = 3;
1601 return 255;
1602 }
1598 1603
1599 data[0] = inode->i_ino; 1604 data[0] = inode->i_ino;
1600 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); 1605 data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 79265fdc317a..4e153051bc75 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -59,7 +59,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
59 if (err) 59 if (err)
60 break; 60 break;
61 61
62 if (!is_owner_or_cap(inode)) { 62 if (!inode_owner_or_capable(inode)) {
63 err = -EPERM; 63 err = -EPERM;
64 goto setflags_out; 64 goto setflags_out;
65 } 65 }
@@ -103,7 +103,7 @@ setflags_out:
103 err = put_user(inode->i_generation, (int __user *)arg); 103 err = put_user(inode->i_generation, (int __user *)arg);
104 break; 104 break;
105 case REISERFS_IOC_SETVERSION: 105 case REISERFS_IOC_SETVERSION:
106 if (!is_owner_or_cap(inode)) { 106 if (!inode_owner_or_capable(inode)) {
107 err = -EPERM; 107 err = -EPERM;
108 break; 108 break;
109 } 109 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3eea859e6990..c77514bd5776 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2876,7 +2876,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
2876 reiserfs_mounted_fs_count++; 2876 reiserfs_mounted_fs_count++;
2877 if (reiserfs_mounted_fs_count <= 1) { 2877 if (reiserfs_mounted_fs_count <= 1) {
2878 reiserfs_write_unlock(sb); 2878 reiserfs_write_unlock(sb);
2879 commit_wq = create_workqueue("reiserfs"); 2879 commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0);
2880 reiserfs_write_lock(sb); 2880 reiserfs_write_lock(sb);
2881 } 2881 }
2882 2882
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ba5f51ec3458..118662690cdf 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -593,7 +593,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, int mode,
593 new_inode_init(inode, dir, mode); 593 new_inode_init(inode, dir, mode);
594 594
595 jbegin_count += reiserfs_cache_default_acl(dir); 595 jbegin_count += reiserfs_cache_default_acl(dir);
596 retval = reiserfs_security_init(dir, inode, &security); 596 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
597 if (retval < 0) { 597 if (retval < 0) {
598 drop_new_inode(inode); 598 drop_new_inode(inode);
599 return retval; 599 return retval;
@@ -667,7 +667,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
667 new_inode_init(inode, dir, mode); 667 new_inode_init(inode, dir, mode);
668 668
669 jbegin_count += reiserfs_cache_default_acl(dir); 669 jbegin_count += reiserfs_cache_default_acl(dir);
670 retval = reiserfs_security_init(dir, inode, &security); 670 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
671 if (retval < 0) { 671 if (retval < 0) {
672 drop_new_inode(inode); 672 drop_new_inode(inode);
673 return retval; 673 return retval;
@@ -747,7 +747,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
747 new_inode_init(inode, dir, mode); 747 new_inode_init(inode, dir, mode);
748 748
749 jbegin_count += reiserfs_cache_default_acl(dir); 749 jbegin_count += reiserfs_cache_default_acl(dir);
750 retval = reiserfs_security_init(dir, inode, &security); 750 retval = reiserfs_security_init(dir, inode, &dentry->d_name, &security);
751 if (retval < 0) { 751 if (retval < 0) {
752 drop_new_inode(inode); 752 drop_new_inode(inode);
753 return retval; 753 return retval;
@@ -771,7 +771,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, 771 EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE,
772 dentry, inode, &security); 772 dentry, inode, &security);
773 if (retval) { 773 if (retval) {
774 dir->i_nlink--; 774 DEC_DIR_INODE_NLINK(dir)
775 goto out_failed; 775 goto out_failed;
776 } 776 }
777 777
@@ -1032,7 +1032,8 @@ static int reiserfs_symlink(struct inode *parent_dir,
1032 } 1032 }
1033 new_inode_init(inode, parent_dir, mode); 1033 new_inode_init(inode, parent_dir, mode);
1034 1034
1035 retval = reiserfs_security_init(parent_dir, inode, &security); 1035 retval = reiserfs_security_init(parent_dir, inode, &dentry->d_name,
1036 &security);
1036 if (retval < 0) { 1037 if (retval < 0) {
1037 drop_new_inode(inode); 1038 drop_new_inode(inode);
1038 return retval; 1039 return retval;
@@ -1122,10 +1123,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
1122 reiserfs_write_unlock(dir->i_sb); 1123 reiserfs_write_unlock(dir->i_sb);
1123 return -EMLINK; 1124 return -EMLINK;
1124 } 1125 }
1125 if (inode->i_nlink == 0) {
1126 reiserfs_write_unlock(dir->i_sb);
1127 return -ENOENT;
1128 }
1129 1126
1130 /* inc before scheduling so reiserfs_unlink knows we are here */ 1127 /* inc before scheduling so reiserfs_unlink knows we are here */
1131 inc_nlink(inode); 1128 inc_nlink(inode);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 3cfb2e933644..5c11ca82b782 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -978,8 +978,6 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
978 978
979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 979static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
980{ 980{
981 if (nd->flags & LOOKUP_RCU)
982 return -ECHILD;
983 return -EPERM; 981 return -EPERM;
984} 982}
985 983
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 90d2fcb67a31..3dc38f1206fc 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -26,7 +26,7 @@ posix_acl_set(struct dentry *dentry, const char *name, const void *value,
26 size_t jcreate_blocks; 26 size_t jcreate_blocks;
27 if (!reiserfs_posixacl(inode->i_sb)) 27 if (!reiserfs_posixacl(inode->i_sb))
28 return -EOPNOTSUPP; 28 return -EOPNOTSUPP;
29 if (!is_owner_or_cap(inode)) 29 if (!inode_owner_or_capable(inode))
30 return -EPERM; 30 return -EPERM;
31 31
32 if (value) { 32 if (value) {
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 237c6928d3c6..ef66c18a9332 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -54,6 +54,7 @@ static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
54 * of blocks needed for the transaction. If successful, reiserfs_security 54 * of blocks needed for the transaction. If successful, reiserfs_security
55 * must be released using reiserfs_security_free when the caller is done. */ 55 * must be released using reiserfs_security_free when the caller is done. */
56int reiserfs_security_init(struct inode *dir, struct inode *inode, 56int reiserfs_security_init(struct inode *dir, struct inode *inode,
57 const struct qstr *qstr,
57 struct reiserfs_security_handle *sec) 58 struct reiserfs_security_handle *sec)
58{ 59{
59 int blocks = 0; 60 int blocks = 0;
@@ -65,7 +66,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
65 if (IS_PRIVATE(dir)) 66 if (IS_PRIVATE(dir))
66 return 0; 67 return 0;
67 68
68 error = security_inode_init_security(inode, dir, &sec->name, 69 error = security_inode_init_security(inode, dir, qstr, &sec->name,
69 &sec->value, &sec->length); 70 &sec->value, &sec->length);
70 if (error) { 71 if (error) {
71 if (error == -EOPNOTSUPP) 72 if (error == -EOPNOTSUPP)
diff --git a/fs/select.c b/fs/select.c
index e56560d2b08a..d33418fdc858 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -517,9 +517,6 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
517 * Update: ERESTARTSYS breaks at least the xview clock binary, so 517 * Update: ERESTARTSYS breaks at least the xview clock binary, so
518 * I'm trying ERESTARTNOHAND which restart only when you want to. 518 * I'm trying ERESTARTNOHAND which restart only when you want to.
519 */ 519 */
520#define MAX_SELECT_SECONDS \
521 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
522
523int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 520int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
524 fd_set __user *exp, struct timespec *end_time) 521 fd_set __user *exp, struct timespec *end_time)
525{ 522{
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index aa68a8a31518..efc309fa3035 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -5,12 +5,12 @@ config SQUASHFS
5 help 5 help
6 Saying Y here includes support for SquashFS 4.0 (a Compressed 6 Saying Y here includes support for SquashFS 4.0 (a Compressed
7 Read-Only File System). Squashfs is a highly compressed read-only 7 Read-Only File System). Squashfs is a highly compressed read-only
8 filesystem for Linux. It uses zlib/lzo compression to compress both 8 filesystem for Linux. It uses zlib, lzo or xz compression to
9 files, inodes and directories. Inodes in the system are very small 9 compress both files, inodes and directories. Inodes in the system
10 and all blocks are packed to minimise data overhead. Block sizes 10 are very small and all blocks are packed to minimise data overhead.
11 greater than 4K are supported up to a maximum of 1 Mbytes (default 11 Block sizes greater than 4K are supported up to a maximum of 1 Mbytes
12 block size 128K). SquashFS 4.0 supports 64 bit filesystems and files 12 (default block size 128K). SquashFS 4.0 supports 64 bit filesystems
13 (larger than 4GB), full uid/gid information, hard links and 13 and files (larger than 4GB), full uid/gid information, hard links and
14 timestamps. 14 timestamps.
15 15
16 Squashfs is intended for general read-only filesystem use, for 16 Squashfs is intended for general read-only filesystem use, for
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index a5940e54c4dd..e921bd213738 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -23,6 +23,7 @@
23 23
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/mutex.h> 25#include <linux/mutex.h>
26#include <linux/slab.h>
26#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
27 28
28#include "squashfs_fs.h" 29#include "squashfs_fs.h"
@@ -74,3 +75,36 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
74 75
75 return decompressor[i]; 76 return decompressor[i];
76} 77}
78
79
80void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
81{
82 struct squashfs_sb_info *msblk = sb->s_fs_info;
83 void *strm, *buffer = NULL;
84 int length = 0;
85
86 /*
87 * Read decompressor specific options from file system if present
88 */
89 if (SQUASHFS_COMP_OPTS(flags)) {
90 buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
91 if (buffer == NULL)
92 return ERR_PTR(-ENOMEM);
93
94 length = squashfs_read_data(sb, &buffer,
95 sizeof(struct squashfs_super_block), 0, NULL,
96 PAGE_CACHE_SIZE, 1);
97
98 if (length < 0) {
99 strm = ERR_PTR(length);
100 goto finished;
101 }
102 }
103
104 strm = msblk->decompressor->init(msblk, buffer, length);
105
106finished:
107 kfree(buffer);
108
109 return strm;
110}
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 3b305a70f7aa..099745ad5691 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,7 +24,7 @@
24 */ 24 */
25 25
26struct squashfs_decompressor { 26struct squashfs_decompressor {
27 void *(*init)(struct squashfs_sb_info *); 27 void *(*init)(struct squashfs_sb_info *, void *, int);
28 void (*free)(void *); 28 void (*free)(void *);
29 int (*decompress)(struct squashfs_sb_info *, void **, 29 int (*decompress)(struct squashfs_sb_info *, void **,
30 struct buffer_head **, int, int, int, int, int); 30 struct buffer_head **, int, int, int, int, int);
@@ -33,11 +33,6 @@ struct squashfs_decompressor {
33 int supported; 33 int supported;
34}; 34};
35 35
36static inline void *squashfs_decompressor_init(struct squashfs_sb_info *msblk)
37{
38 return msblk->decompressor->init(msblk);
39}
40
41static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk, 36static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
42 void *s) 37 void *s)
43{ 38{
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c
index 0dc340aa2be9..3f79cd1d0c19 100644
--- a/fs/squashfs/dir.c
+++ b/fs/squashfs/dir.c
@@ -172,6 +172,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
172 length += sizeof(dirh); 172 length += sizeof(dirh);
173 173
174 dir_count = le32_to_cpu(dirh.count) + 1; 174 dir_count = le32_to_cpu(dirh.count) + 1;
175
176 /* dir_count should never be larger than 256 */
177 if (dir_count > 256)
178 goto failed_read;
179
175 while (dir_count--) { 180 while (dir_count--) {
176 /* 181 /*
177 * Read directory entry. 182 * Read directory entry.
@@ -183,6 +188,10 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir)
183 188
184 size = le16_to_cpu(dire->size) + 1; 189 size = le16_to_cpu(dire->size) + 1;
185 190
191 /* size should never be larger than SQUASHFS_NAME_LEN */
192 if (size > SQUASHFS_NAME_LEN)
193 goto failed_read;
194
186 err = squashfs_read_metadata(inode->i_sb, dire->name, 195 err = squashfs_read_metadata(inode->i_sb, dire->name,
187 &block, &offset, size); 196 &block, &offset, size);
188 if (err < 0) 197 if (err < 0)
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 7da759e34c52..00f4dfc5f088 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -37,7 +37,7 @@ struct squashfs_lzo {
37 void *output; 37 void *output;
38}; 38};
39 39
40static void *lzo_init(struct squashfs_sb_info *msblk) 40static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
41{ 41{
42 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); 42 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
43 43
@@ -58,7 +58,7 @@ failed2:
58failed: 58failed:
59 ERROR("Failed to allocate lzo workspace\n"); 59 ERROR("Failed to allocate lzo workspace\n");
60 kfree(stream); 60 kfree(stream);
61 return NULL; 61 return ERR_PTR(-ENOMEM);
62} 62}
63 63
64 64
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 7a9464d08cf6..5d922a6701ab 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -176,6 +176,11 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
176 length += sizeof(dirh); 176 length += sizeof(dirh);
177 177
178 dir_count = le32_to_cpu(dirh.count) + 1; 178 dir_count = le32_to_cpu(dirh.count) + 1;
179
180 /* dir_count should never be larger than 256 */
181 if (dir_count > 256)
182 goto data_error;
183
179 while (dir_count--) { 184 while (dir_count--) {
180 /* 185 /*
181 * Read directory entry. 186 * Read directory entry.
@@ -187,6 +192,10 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
187 192
188 size = le16_to_cpu(dire->size) + 1; 193 size = le16_to_cpu(dire->size) + 1;
189 194
195 /* size should never be larger than SQUASHFS_NAME_LEN */
196 if (size > SQUASHFS_NAME_LEN)
197 goto data_error;
198
190 err = squashfs_read_metadata(dir->i_sb, dire->name, 199 err = squashfs_read_metadata(dir->i_sb, dire->name,
191 &block, &offset, size); 200 &block, &offset, size);
192 if (err < 0) 201 if (err < 0)
@@ -228,6 +237,9 @@ exit_lookup:
228 d_add(dentry, inode); 237 d_add(dentry, inode);
229 return ERR_PTR(0); 238 return ERR_PTR(0);
230 239
240data_error:
241 err = -EIO;
242
231read_failure: 243read_failure:
232 ERROR("Unable to read directory block [%llx:%x]\n", 244 ERROR("Unable to read directory block [%llx:%x]\n",
233 squashfs_i(dir)->start + msblk->directory_table, 245 squashfs_i(dir)->start + msblk->directory_table,
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index ba729d808876..1f2e608b8785 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -48,6 +48,7 @@ extern int squashfs_read_table(struct super_block *, void *, u64, int);
48 48
49/* decompressor.c */ 49/* decompressor.c */
50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int); 50extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
51extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
51 52
52/* export.c */ 53/* export.c */
53extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, 54extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64,
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index 39533feffd6d..4582c568ef4d 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -57,6 +57,7 @@
57#define SQUASHFS_ALWAYS_FRAG 5 57#define SQUASHFS_ALWAYS_FRAG 5
58#define SQUASHFS_DUPLICATE 6 58#define SQUASHFS_DUPLICATE 6
59#define SQUASHFS_EXPORT 7 59#define SQUASHFS_EXPORT 7
60#define SQUASHFS_COMP_OPT 10
60 61
61#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) 62#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1)
62 63
@@ -81,6 +82,9 @@
81#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \ 82#define SQUASHFS_EXPORTABLE(flags) SQUASHFS_BIT(flags, \
82 SQUASHFS_EXPORT) 83 SQUASHFS_EXPORT)
83 84
85#define SQUASHFS_COMP_OPTS(flags) SQUASHFS_BIT(flags, \
86 SQUASHFS_COMP_OPT)
87
84/* Max number of types and file types */ 88/* Max number of types and file types */
85#define SQUASHFS_DIR_TYPE 1 89#define SQUASHFS_DIR_TYPE 1
86#define SQUASHFS_REG_TYPE 2 90#define SQUASHFS_REG_TYPE 2
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 20700b9f2b4c..5c8184c061a4 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -199,10 +199,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
199 199
200 err = -ENOMEM; 200 err = -ENOMEM;
201 201
202 msblk->stream = squashfs_decompressor_init(msblk);
203 if (msblk->stream == NULL)
204 goto failed_mount;
205
206 msblk->block_cache = squashfs_cache_init("metadata", 202 msblk->block_cache = squashfs_cache_init("metadata",
207 SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE); 203 SQUASHFS_CACHED_BLKS, SQUASHFS_METADATA_SIZE);
208 if (msblk->block_cache == NULL) 204 if (msblk->block_cache == NULL)
@@ -215,6 +211,13 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
215 goto failed_mount; 211 goto failed_mount;
216 } 212 }
217 213
214 msblk->stream = squashfs_decompressor_init(sb, flags);
215 if (IS_ERR(msblk->stream)) {
216 err = PTR_ERR(msblk->stream);
217 msblk->stream = NULL;
218 goto failed_mount;
219 }
220
218 /* Allocate and read id index table */ 221 /* Allocate and read id index table */
219 msblk->id_table = squashfs_read_id_index_table(sb, 222 msblk->id_table = squashfs_read_id_index_table(sb,
220 le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids)); 223 le64_to_cpu(sblk->id_table_start), le16_to_cpu(sblk->no_ids));
@@ -370,8 +373,8 @@ static void squashfs_put_super(struct super_block *sb)
370} 373}
371 374
372 375
373static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags, 376static struct dentry *squashfs_mount(struct file_system_type *fs_type,
374 const char *dev_name, void *data) 377 int flags, const char *dev_name, void *data)
375{ 378{
376 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super); 379 return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
377} 380}
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c4eb40018256..aa47a286d1f8 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -26,10 +26,10 @@
26#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/xz.h> 28#include <linux/xz.h>
29#include <linux/bitops.h>
29 30
30#include "squashfs_fs.h" 31#include "squashfs_fs.h"
31#include "squashfs_fs_sb.h" 32#include "squashfs_fs_sb.h"
32#include "squashfs_fs_i.h"
33#include "squashfs.h" 33#include "squashfs.h"
34#include "decompressor.h" 34#include "decompressor.h"
35 35
@@ -38,24 +38,57 @@ struct squashfs_xz {
38 struct xz_buf buf; 38 struct xz_buf buf;
39}; 39};
40 40
41static void *squashfs_xz_init(struct squashfs_sb_info *msblk) 41struct comp_opts {
42 __le32 dictionary_size;
43 __le32 flags;
44};
45
46static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
47 int len)
42{ 48{
43 int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE); 49 struct comp_opts *comp_opts = buff;
50 struct squashfs_xz *stream;
51 int dict_size = msblk->block_size;
52 int err, n;
53
54 if (comp_opts) {
55 /* check compressor options are the expected length */
56 if (len < sizeof(*comp_opts)) {
57 err = -EIO;
58 goto failed;
59 }
44 60
45 struct squashfs_xz *stream = kmalloc(sizeof(*stream), GFP_KERNEL); 61 dict_size = le32_to_cpu(comp_opts->dictionary_size);
46 if (stream == NULL) 62
63 /* the dictionary size should be 2^n or 2^n+2^(n+1) */
64 n = ffs(dict_size) - 1;
65 if (dict_size != (1 << n) && dict_size != (1 << n) +
66 (1 << (n + 1))) {
67 err = -EIO;
68 goto failed;
69 }
70 }
71
72 dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
73
74 stream = kmalloc(sizeof(*stream), GFP_KERNEL);
75 if (stream == NULL) {
76 err = -ENOMEM;
47 goto failed; 77 goto failed;
78 }
48 79
49 stream->state = xz_dec_init(XZ_PREALLOC, block_size); 80 stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
50 if (stream->state == NULL) 81 if (stream->state == NULL) {
82 kfree(stream);
83 err = -ENOMEM;
51 goto failed; 84 goto failed;
85 }
52 86
53 return stream; 87 return stream;
54 88
55failed: 89failed:
56 ERROR("Failed to allocate xz workspace\n"); 90 ERROR("Failed to initialise xz decompressor\n");
57 kfree(stream); 91 return ERR_PTR(err);
58 return NULL;
59} 92}
60 93
61 94
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 4661ae2b1cec..517688b32ffa 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -26,19 +26,19 @@
26#include <linux/buffer_head.h> 26#include <linux/buffer_head.h>
27#include <linux/slab.h> 27#include <linux/slab.h>
28#include <linux/zlib.h> 28#include <linux/zlib.h>
29#include <linux/vmalloc.h>
29 30
30#include "squashfs_fs.h" 31#include "squashfs_fs.h"
31#include "squashfs_fs_sb.h" 32#include "squashfs_fs_sb.h"
32#include "squashfs.h" 33#include "squashfs.h"
33#include "decompressor.h" 34#include "decompressor.h"
34 35
35static void *zlib_init(struct squashfs_sb_info *dummy) 36static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
36{ 37{
37 z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL); 38 z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
38 if (stream == NULL) 39 if (stream == NULL)
39 goto failed; 40 goto failed;
40 stream->workspace = kmalloc(zlib_inflate_workspacesize(), 41 stream->workspace = vmalloc(zlib_inflate_workspacesize());
41 GFP_KERNEL);
42 if (stream->workspace == NULL) 42 if (stream->workspace == NULL)
43 goto failed; 43 goto failed;
44 44
@@ -47,7 +47,7 @@ static void *zlib_init(struct squashfs_sb_info *dummy)
47failed: 47failed:
48 ERROR("Failed to allocate zlib workspace\n"); 48 ERROR("Failed to allocate zlib workspace\n");
49 kfree(stream); 49 kfree(stream);
50 return NULL; 50 return ERR_PTR(-ENOMEM);
51} 51}
52 52
53 53
@@ -56,7 +56,7 @@ static void zlib_free(void *strm)
56 z_stream *stream = strm; 56 z_stream *stream = strm;
57 57
58 if (stream) 58 if (stream)
59 kfree(stream->workspace); 59 vfree(stream->workspace);
60 kfree(stream); 60 kfree(stream);
61} 61}
62 62
diff --git a/fs/stat.c b/fs/stat.c
index d5c61cf2b703..961039121cb8 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
75 int error = -EINVAL; 75 int error = -EINVAL;
76 int lookup_flags = 0; 76 int lookup_flags = 0;
77 77
78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) 78 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT |
79 AT_EMPTY_PATH)) != 0)
79 goto out; 80 goto out;
80 81
81 if (!(flag & AT_SYMLINK_NOFOLLOW)) 82 if (!(flag & AT_SYMLINK_NOFOLLOW))
82 lookup_flags |= LOOKUP_FOLLOW; 83 lookup_flags |= LOOKUP_FOLLOW;
83 if (flag & AT_NO_AUTOMOUNT) 84 if (flag & AT_NO_AUTOMOUNT)
84 lookup_flags |= LOOKUP_NO_AUTOMOUNT; 85 lookup_flags |= LOOKUP_NO_AUTOMOUNT;
86 if (flag & AT_EMPTY_PATH)
87 lookup_flags |= LOOKUP_EMPTY;
85 88
86 error = user_path_at(dfd, filename, lookup_flags, &path); 89 error = user_path_at(dfd, filename, lookup_flags, &path);
87 if (error) 90 if (error)
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
297 if (bufsiz <= 0) 300 if (bufsiz <= 0)
298 return -EINVAL; 301 return -EINVAL;
299 302
300 error = user_path_at(dfd, pathname, 0, &path); 303 error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path);
301 if (!error) { 304 if (!error) {
302 struct inode *inode = path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
303 306
diff --git a/fs/statfs.c b/fs/statfs.c
index 30ea8c8a996b..8244924dec55 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf)
73} 73}
74EXPORT_SYMBOL(vfs_statfs); 74EXPORT_SYMBOL(vfs_statfs);
75 75
76static int do_statfs_native(struct path *path, struct statfs *buf) 76int user_statfs(const char __user *pathname, struct kstatfs *st)
77{ 77{
78 struct kstatfs st; 78 struct path path;
79 int retval; 79 int error = user_path(pathname, &path);
80 if (!error) {
81 error = vfs_statfs(&path, st);
82 path_put(&path);
83 }
84 return error;
85}
80 86
81 retval = vfs_statfs(path, &st); 87int fd_statfs(int fd, struct kstatfs *st)
82 if (retval) 88{
83 return retval; 89 struct file *file = fget(fd);
90 int error = -EBADF;
91 if (file) {
92 error = vfs_statfs(&file->f_path, st);
93 fput(file);
94 }
95 return error;
96}
84 97
85 if (sizeof(*buf) == sizeof(st)) 98static int do_statfs_native(struct kstatfs *st, struct statfs __user *p)
86 memcpy(buf, &st, sizeof(st)); 99{
100 struct statfs buf;
101
102 if (sizeof(buf) == sizeof(*st))
103 memcpy(&buf, st, sizeof(*st));
87 else { 104 else {
88 if (sizeof buf->f_blocks == 4) { 105 if (sizeof buf.f_blocks == 4) {
89 if ((st.f_blocks | st.f_bfree | st.f_bavail | 106 if ((st->f_blocks | st->f_bfree | st->f_bavail |
90 st.f_bsize | st.f_frsize) & 107 st->f_bsize | st->f_frsize) &
91 0xffffffff00000000ULL) 108 0xffffffff00000000ULL)
92 return -EOVERFLOW; 109 return -EOVERFLOW;
93 /* 110 /*
94 * f_files and f_ffree may be -1; it's okay to stuff 111 * f_files and f_ffree may be -1; it's okay to stuff
95 * that into 32 bits 112 * that into 32 bits
96 */ 113 */
97 if (st.f_files != -1 && 114 if (st->f_files != -1 &&
98 (st.f_files & 0xffffffff00000000ULL)) 115 (st->f_files & 0xffffffff00000000ULL))
99 return -EOVERFLOW; 116 return -EOVERFLOW;
100 if (st.f_ffree != -1 && 117 if (st->f_ffree != -1 &&
101 (st.f_ffree & 0xffffffff00000000ULL)) 118 (st->f_ffree & 0xffffffff00000000ULL))
102 return -EOVERFLOW; 119 return -EOVERFLOW;
103 } 120 }
104 121
105 buf->f_type = st.f_type; 122 buf.f_type = st->f_type;
106 buf->f_bsize = st.f_bsize; 123 buf.f_bsize = st->f_bsize;
107 buf->f_blocks = st.f_blocks; 124 buf.f_blocks = st->f_blocks;
108 buf->f_bfree = st.f_bfree; 125 buf.f_bfree = st->f_bfree;
109 buf->f_bavail = st.f_bavail; 126 buf.f_bavail = st->f_bavail;
110 buf->f_files = st.f_files; 127 buf.f_files = st->f_files;
111 buf->f_ffree = st.f_ffree; 128 buf.f_ffree = st->f_ffree;
112 buf->f_fsid = st.f_fsid; 129 buf.f_fsid = st->f_fsid;
113 buf->f_namelen = st.f_namelen; 130 buf.f_namelen = st->f_namelen;
114 buf->f_frsize = st.f_frsize; 131 buf.f_frsize = st->f_frsize;
115 buf->f_flags = st.f_flags; 132 buf.f_flags = st->f_flags;
116 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 133 memset(buf.f_spare, 0, sizeof(buf.f_spare));
117 } 134 }
135 if (copy_to_user(p, &buf, sizeof(buf)))
136 return -EFAULT;
118 return 0; 137 return 0;
119} 138}
120 139
121static int do_statfs64(struct path *path, struct statfs64 *buf) 140static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p)
122{ 141{
123 struct kstatfs st; 142 struct statfs64 buf;
124 int retval; 143 if (sizeof(buf) == sizeof(*st))
125 144 memcpy(&buf, st, sizeof(*st));
126 retval = vfs_statfs(path, &st);
127 if (retval)
128 return retval;
129
130 if (sizeof(*buf) == sizeof(st))
131 memcpy(buf, &st, sizeof(st));
132 else { 145 else {
133 buf->f_type = st.f_type; 146 buf.f_type = st->f_type;
134 buf->f_bsize = st.f_bsize; 147 buf.f_bsize = st->f_bsize;
135 buf->f_blocks = st.f_blocks; 148 buf.f_blocks = st->f_blocks;
136 buf->f_bfree = st.f_bfree; 149 buf.f_bfree = st->f_bfree;
137 buf->f_bavail = st.f_bavail; 150 buf.f_bavail = st->f_bavail;
138 buf->f_files = st.f_files; 151 buf.f_files = st->f_files;
139 buf->f_ffree = st.f_ffree; 152 buf.f_ffree = st->f_ffree;
140 buf->f_fsid = st.f_fsid; 153 buf.f_fsid = st->f_fsid;
141 buf->f_namelen = st.f_namelen; 154 buf.f_namelen = st->f_namelen;
142 buf->f_frsize = st.f_frsize; 155 buf.f_frsize = st->f_frsize;
143 buf->f_flags = st.f_flags; 156 buf.f_flags = st->f_flags;
144 memset(buf->f_spare, 0, sizeof(buf->f_spare)); 157 memset(buf.f_spare, 0, sizeof(buf.f_spare));
145 } 158 }
159 if (copy_to_user(p, &buf, sizeof(buf)))
160 return -EFAULT;
146 return 0; 161 return 0;
147} 162}
148 163
149SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) 164SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf)
150{ 165{
151 struct path path; 166 struct kstatfs st;
152 int error; 167 int error = user_statfs(pathname, &st);
153 168 if (!error)
154 error = user_path(pathname, &path); 169 error = do_statfs_native(&st, buf);
155 if (!error) {
156 struct statfs tmp;
157 error = do_statfs_native(&path, &tmp);
158 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
159 error = -EFAULT;
160 path_put(&path);
161 }
162 return error; 170 return error;
163} 171}
164 172
165SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) 173SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf)
166{ 174{
167 struct path path; 175 struct kstatfs st;
168 long error; 176 int error;
169
170 if (sz != sizeof(*buf)) 177 if (sz != sizeof(*buf))
171 return -EINVAL; 178 return -EINVAL;
172 error = user_path(pathname, &path); 179 error = user_statfs(pathname, &st);
173 if (!error) { 180 if (!error)
174 struct statfs64 tmp; 181 error = do_statfs64(&st, buf);
175 error = do_statfs64(&path, &tmp);
176 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
177 error = -EFAULT;
178 path_put(&path);
179 }
180 return error; 182 return error;
181} 183}
182 184
183SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) 185SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf)
184{ 186{
185 struct file *file; 187 struct kstatfs st;
186 struct statfs tmp; 188 int error = fd_statfs(fd, &st);
187 int error; 189 if (!error)
188 190 error = do_statfs_native(&st, buf);
189 error = -EBADF;
190 file = fget(fd);
191 if (!file)
192 goto out;
193 error = do_statfs_native(&file->f_path, &tmp);
194 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
195 error = -EFAULT;
196 fput(file);
197out:
198 return error; 191 return error;
199} 192}
200 193
201SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) 194SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf)
202{ 195{
203 struct file *file; 196 struct kstatfs st;
204 struct statfs64 tmp;
205 int error; 197 int error;
206 198
207 if (sz != sizeof(*buf)) 199 if (sz != sizeof(*buf))
208 return -EINVAL; 200 return -EINVAL;
209 201
210 error = -EBADF; 202 error = fd_statfs(fd, &st);
211 file = fget(fd); 203 if (!error)
212 if (!file) 204 error = do_statfs64(&st, buf);
213 goto out;
214 error = do_statfs64(&file->f_path, &tmp);
215 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
216 error = -EFAULT;
217 fput(file);
218out:
219 return error; 205 return error;
220} 206}
221 207
diff --git a/fs/super.c b/fs/super.c
index 7e9dd4cc2c01..e84864908264 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -843,23 +843,6 @@ error:
843} 843}
844EXPORT_SYMBOL(mount_bdev); 844EXPORT_SYMBOL(mount_bdev);
845 845
846int get_sb_bdev(struct file_system_type *fs_type,
847 int flags, const char *dev_name, void *data,
848 int (*fill_super)(struct super_block *, void *, int),
849 struct vfsmount *mnt)
850{
851 struct dentry *root;
852
853 root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
854 if (IS_ERR(root))
855 return PTR_ERR(root);
856 mnt->mnt_root = root;
857 mnt->mnt_sb = root->d_sb;
858 return 0;
859}
860
861EXPORT_SYMBOL(get_sb_bdev);
862
863void kill_block_super(struct super_block *sb) 846void kill_block_super(struct super_block *sb)
864{ 847{
865 struct block_device *bdev = sb->s_bdev; 848 struct block_device *bdev = sb->s_bdev;
@@ -897,22 +880,6 @@ struct dentry *mount_nodev(struct file_system_type *fs_type,
897} 880}
898EXPORT_SYMBOL(mount_nodev); 881EXPORT_SYMBOL(mount_nodev);
899 882
900int get_sb_nodev(struct file_system_type *fs_type,
901 int flags, void *data,
902 int (*fill_super)(struct super_block *, void *, int),
903 struct vfsmount *mnt)
904{
905 struct dentry *root;
906
907 root = mount_nodev(fs_type, flags, data, fill_super);
908 if (IS_ERR(root))
909 return PTR_ERR(root);
910 mnt->mnt_root = root;
911 mnt->mnt_sb = root->d_sb;
912 return 0;
913}
914EXPORT_SYMBOL(get_sb_nodev);
915
916static int compare_single(struct super_block *s, void *p) 883static int compare_single(struct super_block *s, void *p)
917{ 884{
918 return 1; 885 return 1;
@@ -943,69 +910,35 @@ struct dentry *mount_single(struct file_system_type *fs_type,
943} 910}
944EXPORT_SYMBOL(mount_single); 911EXPORT_SYMBOL(mount_single);
945 912
946int get_sb_single(struct file_system_type *fs_type, 913struct dentry *
947 int flags, void *data, 914mount_fs(struct file_system_type *type, int flags, const char *name, void *data)
948 int (*fill_super)(struct super_block *, void *, int),
949 struct vfsmount *mnt)
950{
951 struct dentry *root;
952 root = mount_single(fs_type, flags, data, fill_super);
953 if (IS_ERR(root))
954 return PTR_ERR(root);
955 mnt->mnt_root = root;
956 mnt->mnt_sb = root->d_sb;
957 return 0;
958}
959
960EXPORT_SYMBOL(get_sb_single);
961
962struct vfsmount *
963vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
964{ 915{
965 struct vfsmount *mnt;
966 struct dentry *root; 916 struct dentry *root;
917 struct super_block *sb;
967 char *secdata = NULL; 918 char *secdata = NULL;
968 int error; 919 int error = -ENOMEM;
969
970 if (!type)
971 return ERR_PTR(-ENODEV);
972
973 error = -ENOMEM;
974 mnt = alloc_vfsmnt(name);
975 if (!mnt)
976 goto out;
977
978 if (flags & MS_KERNMOUNT)
979 mnt->mnt_flags = MNT_INTERNAL;
980 920
981 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { 921 if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
982 secdata = alloc_secdata(); 922 secdata = alloc_secdata();
983 if (!secdata) 923 if (!secdata)
984 goto out_mnt; 924 goto out;
985 925
986 error = security_sb_copy_data(data, secdata); 926 error = security_sb_copy_data(data, secdata);
987 if (error) 927 if (error)
988 goto out_free_secdata; 928 goto out_free_secdata;
989 } 929 }
990 930
991 if (type->mount) { 931 root = type->mount(type, flags, name, data);
992 root = type->mount(type, flags, name, data); 932 if (IS_ERR(root)) {
993 if (IS_ERR(root)) { 933 error = PTR_ERR(root);
994 error = PTR_ERR(root); 934 goto out_free_secdata;
995 goto out_free_secdata;
996 }
997 mnt->mnt_root = root;
998 mnt->mnt_sb = root->d_sb;
999 } else {
1000 error = type->get_sb(type, flags, name, data, mnt);
1001 if (error < 0)
1002 goto out_free_secdata;
1003 } 935 }
1004 BUG_ON(!mnt->mnt_sb); 936 sb = root->d_sb;
1005 WARN_ON(!mnt->mnt_sb->s_bdi); 937 BUG_ON(!sb);
1006 mnt->mnt_sb->s_flags |= MS_BORN; 938 WARN_ON(!sb->s_bdi);
939 sb->s_flags |= MS_BORN;
1007 940
1008 error = security_sb_kern_mount(mnt->mnt_sb, flags, secdata); 941 error = security_sb_kern_mount(sb, flags, secdata);
1009 if (error) 942 if (error)
1010 goto out_sb; 943 goto out_sb;
1011 944
@@ -1016,27 +949,21 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
1016 * violate this rule. This warning should be either removed or 949 * violate this rule. This warning should be either removed or
1017 * converted to a BUG() in 2.6.34. 950 * converted to a BUG() in 2.6.34.
1018 */ 951 */
1019 WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to " 952 WARN((sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
1020 "negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes); 953 "negative value (%lld)\n", type->name, sb->s_maxbytes);
1021 954
1022 mnt->mnt_mountpoint = mnt->mnt_root; 955 up_write(&sb->s_umount);
1023 mnt->mnt_parent = mnt;
1024 up_write(&mnt->mnt_sb->s_umount);
1025 free_secdata(secdata); 956 free_secdata(secdata);
1026 return mnt; 957 return root;
1027out_sb: 958out_sb:
1028 dput(mnt->mnt_root); 959 dput(root);
1029 deactivate_locked_super(mnt->mnt_sb); 960 deactivate_locked_super(sb);
1030out_free_secdata: 961out_free_secdata:
1031 free_secdata(secdata); 962 free_secdata(secdata);
1032out_mnt:
1033 free_vfsmnt(mnt);
1034out: 963out:
1035 return ERR_PTR(error); 964 return ERR_PTR(error);
1036} 965}
1037 966
1038EXPORT_SYMBOL_GPL(vfs_kern_mount);
1039
1040/** 967/**
1041 * freeze_super - lock the filesystem and force it into a consistent state 968 * freeze_super - lock the filesystem and force it into a consistent state
1042 * @sb: the super to lock 969 * @sb: the super to lock
@@ -1126,49 +1053,3 @@ out:
1126 return 0; 1053 return 0;
1127} 1054}
1128EXPORT_SYMBOL(thaw_super); 1055EXPORT_SYMBOL(thaw_super);
1129
1130static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1131{
1132 int err;
1133 const char *subtype = strchr(fstype, '.');
1134 if (subtype) {
1135 subtype++;
1136 err = -EINVAL;
1137 if (!subtype[0])
1138 goto err;
1139 } else
1140 subtype = "";
1141
1142 mnt->mnt_sb->s_subtype = kstrdup(subtype, GFP_KERNEL);
1143 err = -ENOMEM;
1144 if (!mnt->mnt_sb->s_subtype)
1145 goto err;
1146 return mnt;
1147
1148 err:
1149 mntput(mnt);
1150 return ERR_PTR(err);
1151}
1152
1153struct vfsmount *
1154do_kern_mount(const char *fstype, int flags, const char *name, void *data)
1155{
1156 struct file_system_type *type = get_fs_type(fstype);
1157 struct vfsmount *mnt;
1158 if (!type)
1159 return ERR_PTR(-ENODEV);
1160 mnt = vfs_kern_mount(type, flags, name, data);
1161 if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
1162 !mnt->mnt_sb->s_subtype)
1163 mnt = fs_set_subtype(mnt, fstype);
1164 put_filesystem(type);
1165 return mnt;
1166}
1167EXPORT_SYMBOL_GPL(do_kern_mount);
1168
1169struct vfsmount *kern_mount_data(struct file_system_type *type, void *data)
1170{
1171 return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data);
1172}
1173
1174EXPORT_SYMBOL_GPL(kern_mount_data);
diff --git a/fs/sync.c b/fs/sync.c
index ba76b9623e7e..92ca208777d5 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -7,6 +7,7 @@
7#include <linux/fs.h> 7#include <linux/fs.h>
8#include <linux/slab.h> 8#include <linux/slab.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/namei.h>
10#include <linux/sched.h> 11#include <linux/sched.h>
11#include <linux/writeback.h> 12#include <linux/writeback.h>
12#include <linux/syscalls.h> 13#include <linux/syscalls.h>
@@ -128,6 +129,29 @@ void emergency_sync(void)
128 } 129 }
129} 130}
130 131
132/*
133 * sync a single super
134 */
135SYSCALL_DEFINE1(syncfs, int, fd)
136{
137 struct file *file;
138 struct super_block *sb;
139 int ret;
140 int fput_needed;
141
142 file = fget_light(fd, &fput_needed);
143 if (!file)
144 return -EBADF;
145 sb = file->f_dentry->d_sb;
146
147 down_read(&sb->s_umount);
148 ret = sync_filesystem(sb);
149 up_read(&sb->s_umount);
150
151 fput_light(file, fput_needed);
152 return ret;
153}
154
131/** 155/**
132 * vfs_fsync_range - helper to sync a range of data & metadata to disk 156 * vfs_fsync_range - helper to sync a range of data & metadata to disk
133 * @file: file to sync 157 * @file: file to sync
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index b427b1208c26..e474fbcf8bde 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -245,7 +245,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
245 new_de = sysv_find_entry(new_dentry, &new_page); 245 new_de = sysv_find_entry(new_dentry, &new_page);
246 if (!new_de) 246 if (!new_de)
247 goto out_dir; 247 goto out_dir;
248 inode_inc_link_count(old_inode);
249 sysv_set_link(new_de, new_page, old_inode); 248 sysv_set_link(new_de, new_page, old_inode);
250 new_inode->i_ctime = CURRENT_TIME_SEC; 249 new_inode->i_ctime = CURRENT_TIME_SEC;
251 if (dir_de) 250 if (dir_de)
@@ -257,18 +256,15 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
257 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max) 256 if (new_dir->i_nlink >= SYSV_SB(new_dir->i_sb)->s_link_max)
258 goto out_dir; 257 goto out_dir;
259 } 258 }
260 inode_inc_link_count(old_inode);
261 err = sysv_add_link(new_dentry, old_inode); 259 err = sysv_add_link(new_dentry, old_inode);
262 if (err) { 260 if (err)
263 inode_dec_link_count(old_inode);
264 goto out_dir; 261 goto out_dir;
265 }
266 if (dir_de) 262 if (dir_de)
267 inode_inc_link_count(new_dir); 263 inode_inc_link_count(new_dir);
268 } 264 }
269 265
270 sysv_delete_entry(old_de, old_page); 266 sysv_delete_entry(old_de, old_page);
271 inode_dec_link_count(old_inode); 267 mark_inode_dirty(old_inode);
272 268
273 if (dir_de) { 269 if (dir_de) {
274 sysv_set_link(dir_de, dir_page, new_dir); 270 sysv_set_link(dir_de, dir_page, new_dir);
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 830e3f76f442..1d1859dc3de5 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -44,23 +44,20 @@ config UBIFS_FS_ZLIB
44 44
45# Debugging-related stuff 45# Debugging-related stuff
46config UBIFS_FS_DEBUG 46config UBIFS_FS_DEBUG
47 bool "Enable debugging" 47 bool "Enable debugging support"
48 depends on UBIFS_FS 48 depends on UBIFS_FS
49 select DEBUG_FS 49 select DEBUG_FS
50 select KALLSYMS_ALL 50 select KALLSYMS_ALL
51 help 51 help
52 This option enables UBIFS debugging. 52 This option enables UBIFS debugging support. It makes sure various
53 53 assertions, self-checks, debugging messages and test modes are compiled
54config UBIFS_FS_DEBUG_MSG_LVL 54 in (this all is compiled out otherwise). Assertions are light-weight
55 int "Default message level (0 = no extra messages, 3 = lots)" 55 and this option also enables them. Self-checks, debugging messages and
56 depends on UBIFS_FS_DEBUG 56 test modes are switched off by default. Thus, it is safe and actually
57 default "0" 57 recommended to have debugging support enabled, and it should not slow
58 help 58 down UBIFS. You can then further enable / disable individual debugging
59 This controls the amount of debugging messages produced by UBIFS. 59 features using UBIFS module parameters and the corresponding sysfs
60 If reporting bugs, please try to have available a full dump of the 60 interfaces.
61 messages at level 1 while the misbehaviour was occurring. Level 2
62 may become necessary if level 1 messages were not enough to find the
63 bug. Generally Level 3 should be avoided.
64 61
65config UBIFS_FS_DEBUG_CHKS 62config UBIFS_FS_DEBUG_CHKS
66 bool "Enable extra checks" 63 bool "Enable extra checks"
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 02429d81ca33..b148fbc80f8d 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -48,6 +48,56 @@
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include "ubifs.h" 49#include "ubifs.h"
50 50
51/*
52 * nothing_to_commit - check if there is nothing to commit.
53 * @c: UBIFS file-system description object
54 *
55 * This is a helper function which checks if there is anything to commit. It is
56 * used as an optimization to avoid starting the commit if it is not really
57 * necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
58 * writing the commit start node to the log), and it is better to avoid doing
59 * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
60 * nothing to commit, it is more optimal to avoid any flash I/O.
61 *
62 * This function has to be called with @c->commit_sem locked for writing -
63 * this function does not take LPT/TNC locks because the @c->commit_sem
64 * guarantees that we have exclusive access to the TNC and LPT data structures.
65 *
66 * This function returns %1 if there is nothing to commit and %0 otherwise.
67 */
68static int nothing_to_commit(struct ubifs_info *c)
69{
70 /*
71 * During mounting or remounting from R/O mode to R/W mode we may
72 * commit for various recovery-related reasons.
73 */
74 if (c->mounting || c->remounting_rw)
75 return 0;
76
77 /*
78 * If the root TNC node is dirty, we definitely have something to
79 * commit.
80 */
81 if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
82 return 0;
83
84 /*
85 * Even though the TNC is clean, the LPT tree may have dirty nodes. For
86 * example, this may happen if the budgeting subsystem invoked GC to
87 * make some free space, and the GC found an LEB with only dirty and
88 * free space. In this case GC would just change the lprops of this
89 * LEB (by turning all space into free space) and unmap it.
90 */
91 if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
92 return 0;
93
94 ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
95 ubifs_assert(c->dirty_pn_cnt == 0);
96 ubifs_assert(c->dirty_nn_cnt == 0);
97
98 return 1;
99}
100
51/** 101/**
52 * do_commit - commit the journal. 102 * do_commit - commit the journal.
53 * @c: UBIFS file-system description object 103 * @c: UBIFS file-system description object
@@ -70,6 +120,12 @@ static int do_commit(struct ubifs_info *c)
70 goto out_up; 120 goto out_up;
71 } 121 }
72 122
123 if (nothing_to_commit(c)) {
124 up_write(&c->commit_sem);
125 err = 0;
126 goto out_cancel;
127 }
128
73 /* Sync all write buffers (necessary for recovery) */ 129 /* Sync all write buffers (necessary for recovery) */
74 for (i = 0; i < c->jhead_cnt; i++) { 130 for (i = 0; i < c->jhead_cnt; i++) {
75 err = ubifs_wbuf_sync(&c->jheads[i].wbuf); 131 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
@@ -162,12 +218,12 @@ static int do_commit(struct ubifs_info *c)
162 if (err) 218 if (err)
163 goto out; 219 goto out;
164 220
221out_cancel:
165 spin_lock(&c->cs_lock); 222 spin_lock(&c->cs_lock);
166 c->cmt_state = COMMIT_RESTING; 223 c->cmt_state = COMMIT_RESTING;
167 wake_up(&c->cmt_wq); 224 wake_up(&c->cmt_wq);
168 dbg_cmt("commit end"); 225 dbg_cmt("commit end");
169 spin_unlock(&c->cs_lock); 226 spin_unlock(&c->cs_lock);
170
171 return 0; 227 return 0;
172 228
173out_up: 229out_up:
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 0bee4dbffc31..01c2b028e525 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -43,8 +43,8 @@ DEFINE_SPINLOCK(dbg_lock);
43static char dbg_key_buf0[128]; 43static char dbg_key_buf0[128];
44static char dbg_key_buf1[128]; 44static char dbg_key_buf1[128];
45 45
46unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; 46unsigned int ubifs_msg_flags;
47unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; 47unsigned int ubifs_chk_flags;
48unsigned int ubifs_tst_flags; 48unsigned int ubifs_tst_flags;
49 49
50module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); 50module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
@@ -810,16 +810,24 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
810{ 810{
811 struct ubifs_scan_leb *sleb; 811 struct ubifs_scan_leb *sleb;
812 struct ubifs_scan_node *snod; 812 struct ubifs_scan_node *snod;
813 void *buf;
813 814
814 if (dbg_failure_mode) 815 if (dbg_failure_mode)
815 return; 816 return;
816 817
817 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 818 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
818 current->pid, lnum); 819 current->pid, lnum);
819 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 820
821 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
822 if (!buf) {
823 ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
824 return;
825 }
826
827 sleb = ubifs_scan(c, lnum, 0, buf, 0);
820 if (IS_ERR(sleb)) { 828 if (IS_ERR(sleb)) {
821 ubifs_err("scan error %d", (int)PTR_ERR(sleb)); 829 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
822 return; 830 goto out;
823 } 831 }
824 832
825 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, 833 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
@@ -835,6 +843,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
835 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 843 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
836 current->pid, lnum); 844 current->pid, lnum);
837 ubifs_scan_destroy(sleb); 845 ubifs_scan_destroy(sleb);
846
847out:
848 vfree(buf);
838 return; 849 return;
839} 850}
840 851
@@ -2690,16 +2701,8 @@ int ubifs_debugging_init(struct ubifs_info *c)
2690 if (!c->dbg) 2701 if (!c->dbg)
2691 return -ENOMEM; 2702 return -ENOMEM;
2692 2703
2693 c->dbg->buf = vmalloc(c->leb_size);
2694 if (!c->dbg->buf)
2695 goto out;
2696
2697 failure_mode_init(c); 2704 failure_mode_init(c);
2698 return 0; 2705 return 0;
2699
2700out:
2701 kfree(c->dbg);
2702 return -ENOMEM;
2703} 2706}
2704 2707
2705/** 2708/**
@@ -2709,7 +2712,6 @@ out:
2709void ubifs_debugging_exit(struct ubifs_info *c) 2712void ubifs_debugging_exit(struct ubifs_info *c)
2710{ 2713{
2711 failure_mode_exit(c); 2714 failure_mode_exit(c);
2712 vfree(c->dbg->buf);
2713 kfree(c->dbg); 2715 kfree(c->dbg);
2714} 2716}
2715 2717
@@ -2813,19 +2815,19 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2813 } 2815 }
2814 2816
2815 fname = "dump_lprops"; 2817 fname = "dump_lprops";
2816 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2818 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2817 if (IS_ERR(dent)) 2819 if (IS_ERR(dent))
2818 goto out_remove; 2820 goto out_remove;
2819 d->dfs_dump_lprops = dent; 2821 d->dfs_dump_lprops = dent;
2820 2822
2821 fname = "dump_budg"; 2823 fname = "dump_budg";
2822 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2824 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2823 if (IS_ERR(dent)) 2825 if (IS_ERR(dent))
2824 goto out_remove; 2826 goto out_remove;
2825 d->dfs_dump_budg = dent; 2827 d->dfs_dump_budg = dent;
2826 2828
2827 fname = "dump_tnc"; 2829 fname = "dump_tnc";
2828 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2830 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2829 if (IS_ERR(dent)) 2831 if (IS_ERR(dent))
2830 goto out_remove; 2832 goto out_remove;
2831 d->dfs_dump_tnc = dent; 2833 d->dfs_dump_tnc = dent;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 69ebe4729151..919f0de29d8f 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -27,7 +27,6 @@
27 27
28/** 28/**
29 * ubifs_debug_info - per-FS debugging information. 29 * ubifs_debug_info - per-FS debugging information.
30 * @buf: a buffer of LEB size, used for various purposes
31 * @old_zroot: old index root - used by 'dbg_check_old_index()' 30 * @old_zroot: old index root - used by 'dbg_check_old_index()'
32 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' 31 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
33 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' 32 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
@@ -54,7 +53,6 @@
54 * dfs_dump_tnc: "dump TNC" debugfs knob 53 * dfs_dump_tnc: "dump TNC" debugfs knob
55 */ 54 */
56struct ubifs_debug_info { 55struct ubifs_debug_info {
57 void *buf;
58 struct ubifs_zbranch old_zroot; 56 struct ubifs_zbranch old_zroot;
59 int old_zroot_level; 57 int old_zroot_level;
60 unsigned long long old_zroot_sqnum; 58 unsigned long long old_zroot_sqnum;
@@ -173,7 +171,7 @@ const char *dbg_key_str1(const struct ubifs_info *c,
173#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 171#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
174 172
175/* 173/*
176 * Debugging message type flags (must match msg_type_names in debug.c). 174 * Debugging message type flags.
177 * 175 *
178 * UBIFS_MSG_GEN: general messages 176 * UBIFS_MSG_GEN: general messages
179 * UBIFS_MSG_JNL: journal messages 177 * UBIFS_MSG_JNL: journal messages
@@ -205,14 +203,8 @@ enum {
205 UBIFS_MSG_RCVRY = 0x1000, 203 UBIFS_MSG_RCVRY = 0x1000,
206}; 204};
207 205
208/* Debugging message type flags for each default debug message level */
209#define UBIFS_MSG_LVL_0 0
210#define UBIFS_MSG_LVL_1 0x1
211#define UBIFS_MSG_LVL_2 0x7f
212#define UBIFS_MSG_LVL_3 0xffff
213
214/* 206/*
215 * Debugging check flags (must match chk_names in debug.c). 207 * Debugging check flags.
216 * 208 *
217 * UBIFS_CHK_GEN: general checks 209 * UBIFS_CHK_GEN: general checks
218 * UBIFS_CHK_TNC: check TNC 210 * UBIFS_CHK_TNC: check TNC
@@ -233,7 +225,7 @@ enum {
233}; 225};
234 226
235/* 227/*
236 * Special testing flags (must match tst_names in debug.c). 228 * Special testing flags.
237 * 229 *
238 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method 230 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
239 * UBIFS_TST_RCVRY: failure mode for recovery testing 231 * UBIFS_TST_RCVRY: failure mode for recovery testing
@@ -243,22 +235,6 @@ enum {
243 UBIFS_TST_RCVRY = 0x4, 235 UBIFS_TST_RCVRY = 0x4,
244}; 236};
245 237
246#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
247#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
248#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
249#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
250#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
251#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
252#else
253#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
254#endif
255
256#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
257#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
258#else
259#define UBIFS_CHK_FLAGS_DEFAULT 0
260#endif
261
262extern spinlock_t dbg_lock; 238extern spinlock_t dbg_lock;
263 239
264extern unsigned int ubifs_msg_flags; 240extern unsigned int ubifs_msg_flags;
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 14f64b689d7f..7217d67a80a6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index d82173182eeb..dfd168b7807e 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -31,6 +31,26 @@
31 * buffer is full or when it is not used for some time (by timer). This is 31 * buffer is full or when it is not used for some time (by timer). This is
32 * similar to the mechanism is used by JFFS2. 32 * similar to the mechanism is used by JFFS2.
33 * 33 *
34 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
35 * write size (@c->max_write_size). The latter is the maximum amount of bytes
36 * the underlying flash is able to program at a time, and writing in
37 * @c->max_write_size units should presumably be faster. Obviously,
38 * @c->min_io_size <= @c->max_write_size. Write-buffers are of
39 * @c->max_write_size bytes in size for maximum performance. However, when a
40 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
41 * boundary) which contains data is written, not the whole write-buffer,
42 * because this is more space-efficient.
43 *
44 * This optimization adds few complications to the code. Indeed, on the one
45 * hand, we want to write in optimal @c->max_write_size bytes chunks, which
46 * also means aligning writes at the @c->max_write_size bytes offsets. On the
47 * other hand, we do not want to waste space when synchronizing the write
48 * buffer, so during synchronization we writes in smaller chunks. And this makes
49 * the next write offset to be not aligned to @c->max_write_size bytes. So the
50 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
51 * to @c->max_write_size bytes again. We do this by temporarily shrinking
52 * write-buffer size (@wbuf->size).
53 *
34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 54 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
35 * mutexes defined inside these objects. Since sometimes upper-level code 55 * mutexes defined inside these objects. Since sometimes upper-level code
36 * has to lock the write-buffer (e.g. journal space reservation code), many 56 * has to lock the write-buffer (e.g. journal space reservation code), many
@@ -46,8 +66,8 @@
46 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 66 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
47 * uses padding nodes or padding bytes, if the padding node does not fit. 67 * uses padding nodes or padding bytes, if the padding node does not fit.
48 * 68 *
49 * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes 69 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
50 * every time they are read from the flash media. 70 * they are read from the flash media.
51 */ 71 */
52 72
53#include <linux/crc32.h> 73#include <linux/crc32.h>
@@ -88,8 +108,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
88 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is 108 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
89 * true, which is controlled by corresponding UBIFS mount option. However, if 109 * true, which is controlled by corresponding UBIFS mount option. However, if
90 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is 110 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
91 * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is 111 * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
92 * ignored and CRC is checked. 112 * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
113 * is checked. This is because during mounting or re-mounting from R/O mode to
114 * R/W mode we may read journal nodes (when replying the journal or doing the
115 * recovery) and the journal nodes may potentially be corrupted, so checking is
116 * required.
93 * 117 *
94 * This function returns zero in case of success and %-EUCLEAN in case of bad 118 * This function returns zero in case of success and %-EUCLEAN in case of bad
95 * CRC or magic. 119 * CRC or magic.
@@ -131,8 +155,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
131 node_len > c->ranges[type].max_len) 155 node_len > c->ranges[type].max_len)
132 goto out_len; 156 goto out_len;
133 157
134 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && 158 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
135 c->no_chk_data_crc) 159 !c->remounting_rw && c->no_chk_data_crc)
136 return 0; 160 return 0;
137 161
138 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 162 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
@@ -343,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
343 * 367 *
344 * This function synchronizes write-buffer @buf and returns zero in case of 368 * This function synchronizes write-buffer @buf and returns zero in case of
345 * success or a negative error code in case of failure. 369 * success or a negative error code in case of failure.
370 *
371 * Note, although write-buffers are of @c->max_write_size, this function does
372 * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
373 * if the write-buffer is only partially filled with data, only the used part
374 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
375 * This way we waste less space.
346 */ 376 */
347int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 377int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
348{ 378{
349 struct ubifs_info *c = wbuf->c; 379 struct ubifs_info *c = wbuf->c;
350 int err, dirt; 380 int err, dirt, sync_len;
351 381
352 cancel_wbuf_timer_nolock(wbuf); 382 cancel_wbuf_timer_nolock(wbuf);
353 if (!wbuf->used || wbuf->lnum == -1) 383 if (!wbuf->used || wbuf->lnum == -1)
@@ -357,27 +387,53 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
357 dbg_io("LEB %d:%d, %d bytes, jhead %s", 387 dbg_io("LEB %d:%d, %d bytes, jhead %s",
358 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); 388 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
359 ubifs_assert(!(wbuf->avail & 7)); 389 ubifs_assert(!(wbuf->avail & 7));
360 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); 390 ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
391 ubifs_assert(wbuf->size >= c->min_io_size);
392 ubifs_assert(wbuf->size <= c->max_write_size);
393 ubifs_assert(wbuf->size % c->min_io_size == 0);
361 ubifs_assert(!c->ro_media && !c->ro_mount); 394 ubifs_assert(!c->ro_media && !c->ro_mount);
395 if (c->leb_size - wbuf->offs >= c->max_write_size)
396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
362 397
363 if (c->ro_error) 398 if (c->ro_error)
364 return -EROFS; 399 return -EROFS;
365 400
366 ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); 401 /*
402 * Do not write whole write buffer but write only the minimum necessary
403 * amount of min. I/O units.
404 */
405 sync_len = ALIGN(wbuf->used, c->min_io_size);
406 dirt = sync_len - wbuf->used;
407 if (dirt)
408 ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
367 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 409 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
368 c->min_io_size, wbuf->dtype); 410 sync_len, wbuf->dtype);
369 if (err) { 411 if (err) {
370 ubifs_err("cannot write %d bytes to LEB %d:%d", 412 ubifs_err("cannot write %d bytes to LEB %d:%d",
371 c->min_io_size, wbuf->lnum, wbuf->offs); 413 sync_len, wbuf->lnum, wbuf->offs);
372 dbg_dump_stack(); 414 dbg_dump_stack();
373 return err; 415 return err;
374 } 416 }
375 417
376 dirt = wbuf->avail;
377
378 spin_lock(&wbuf->lock); 418 spin_lock(&wbuf->lock);
379 wbuf->offs += c->min_io_size; 419 wbuf->offs += sync_len;
380 wbuf->avail = c->min_io_size; 420 /*
421 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
422 * But our goal is to optimize writes and make sure we write in
423 * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
424 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
425 * sure that @wbuf->offs + @wbuf->size is aligned to
426 * @c->max_write_size. This way we make sure that after next
427 * write-buffer flush we are again at the optimal offset (aligned to
428 * @c->max_write_size).
429 */
430 if (c->leb_size - wbuf->offs < c->max_write_size)
431 wbuf->size = c->leb_size - wbuf->offs;
432 else if (wbuf->offs & (c->max_write_size - 1))
433 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
434 else
435 wbuf->size = c->max_write_size;
436 wbuf->avail = wbuf->size;
381 wbuf->used = 0; 437 wbuf->used = 0;
382 wbuf->next_ino = 0; 438 wbuf->next_ino = 0;
383 spin_unlock(&wbuf->lock); 439 spin_unlock(&wbuf->lock);
@@ -420,7 +476,13 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
420 spin_lock(&wbuf->lock); 476 spin_lock(&wbuf->lock);
421 wbuf->lnum = lnum; 477 wbuf->lnum = lnum;
422 wbuf->offs = offs; 478 wbuf->offs = offs;
423 wbuf->avail = c->min_io_size; 479 if (c->leb_size - wbuf->offs < c->max_write_size)
480 wbuf->size = c->leb_size - wbuf->offs;
481 else if (wbuf->offs & (c->max_write_size - 1))
482 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
483 else
484 wbuf->size = c->max_write_size;
485 wbuf->avail = wbuf->size;
424 wbuf->used = 0; 486 wbuf->used = 0;
425 spin_unlock(&wbuf->lock); 487 spin_unlock(&wbuf->lock);
426 wbuf->dtype = dtype; 488 wbuf->dtype = dtype;
@@ -500,8 +562,9 @@ out_timers:
500 * 562 *
501 * This function writes data to flash via write-buffer @wbuf. This means that 563 * This function writes data to flash via write-buffer @wbuf. This means that
502 * the last piece of the node won't reach the flash media immediately if it 564 * the last piece of the node won't reach the flash media immediately if it
503 * does not take whole minimal I/O unit. Instead, the node will sit in RAM 565 * does not take whole max. write unit (@c->max_write_size). Instead, the node
504 * until the write-buffer is synchronized (e.g., by timer). 566 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
567 * because more data are appended to the write-buffer).
505 * 568 *
506 * This function returns zero in case of success and a negative error code in 569 * This function returns zero in case of success and a negative error code in
507 * case of failure. If the node cannot be written because there is no more 570 * case of failure. If the node cannot be written because there is no more
@@ -518,9 +581,14 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
518 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 581 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
519 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 582 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
520 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 583 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
521 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); 584 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
585 ubifs_assert(wbuf->size >= c->min_io_size);
586 ubifs_assert(wbuf->size <= c->max_write_size);
587 ubifs_assert(wbuf->size % c->min_io_size == 0);
522 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
523 ubifs_assert(!c->ro_media && !c->ro_mount); 589 ubifs_assert(!c->ro_media && !c->ro_mount);
590 if (c->leb_size - wbuf->offs >= c->max_write_size)
591 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
524 592
525 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
526 err = -ENOSPC; 594 err = -ENOSPC;
@@ -543,14 +611,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
543 dbg_io("flush jhead %s wbuf to LEB %d:%d", 611 dbg_io("flush jhead %s wbuf to LEB %d:%d",
544 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 612 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
545 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 613 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
546 wbuf->offs, c->min_io_size, 614 wbuf->offs, wbuf->size,
547 wbuf->dtype); 615 wbuf->dtype);
548 if (err) 616 if (err)
549 goto out; 617 goto out;
550 618
551 spin_lock(&wbuf->lock); 619 spin_lock(&wbuf->lock);
552 wbuf->offs += c->min_io_size; 620 wbuf->offs += wbuf->size;
553 wbuf->avail = c->min_io_size; 621 if (c->leb_size - wbuf->offs >= c->max_write_size)
622 wbuf->size = c->max_write_size;
623 else
624 wbuf->size = c->leb_size - wbuf->offs;
625 wbuf->avail = wbuf->size;
554 wbuf->used = 0; 626 wbuf->used = 0;
555 wbuf->next_ino = 0; 627 wbuf->next_ino = 0;
556 spin_unlock(&wbuf->lock); 628 spin_unlock(&wbuf->lock);
@@ -564,33 +636,57 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
564 goto exit; 636 goto exit;
565 } 637 }
566 638
567 /* 639 offs = wbuf->offs;
568 * The node is large enough and does not fit entirely within current 640 written = 0;
569 * minimal I/O unit. We have to fill and flush write-buffer and switch
570 * to the next min. I/O unit.
571 */
572 dbg_io("flush jhead %s wbuf to LEB %d:%d",
573 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
574 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
575 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
576 c->min_io_size, wbuf->dtype);
577 if (err)
578 goto out;
579 641
580 offs = wbuf->offs + c->min_io_size; 642 if (wbuf->used) {
581 len -= wbuf->avail; 643 /*
582 aligned_len -= wbuf->avail; 644 * The node is large enough and does not fit entirely within
583 written = wbuf->avail; 645 * current available space. We have to fill and flush
646 * write-buffer and switch to the next max. write unit.
647 */
648 dbg_io("flush jhead %s wbuf to LEB %d:%d",
649 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
650 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
651 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
652 wbuf->size, wbuf->dtype);
653 if (err)
654 goto out;
655
656 offs += wbuf->size;
657 len -= wbuf->avail;
658 aligned_len -= wbuf->avail;
659 written += wbuf->avail;
660 } else if (wbuf->offs & (c->max_write_size - 1)) {
661 /*
662 * The write-buffer offset is not aligned to
663 * @c->max_write_size and @wbuf->size is less than
664 * @c->max_write_size. Write @wbuf->size bytes to make sure the
665 * following writes are done in optimal @c->max_write_size
666 * chunks.
667 */
668 dbg_io("write %d bytes to LEB %d:%d",
669 wbuf->size, wbuf->lnum, wbuf->offs);
670 err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
671 wbuf->size, wbuf->dtype);
672 if (err)
673 goto out;
674
675 offs += wbuf->size;
676 len -= wbuf->size;
677 aligned_len -= wbuf->size;
678 written += wbuf->size;
679 }
584 680
585 /* 681 /*
586 * The remaining data may take more whole min. I/O units, so write the 682 * The remaining data may take more whole max. write units, so write the
587 * remains multiple to min. I/O unit size directly to the flash media. 683 * remains multiple to max. write unit size directly to the flash media.
588 * We align node length to 8-byte boundary because we anyway flash wbuf 684 * We align node length to 8-byte boundary because we anyway flash wbuf
589 * if the remaining space is less than 8 bytes. 685 * if the remaining space is less than 8 bytes.
590 */ 686 */
591 n = aligned_len >> c->min_io_shift; 687 n = aligned_len >> c->max_write_shift;
592 if (n) { 688 if (n) {
593 n <<= c->min_io_shift; 689 n <<= c->max_write_shift;
594 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 690 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
595 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 691 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
596 wbuf->dtype); 692 wbuf->dtype);
@@ -606,14 +702,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
606 if (aligned_len) 702 if (aligned_len)
607 /* 703 /*
608 * And now we have what's left and what does not take whole 704 * And now we have what's left and what does not take whole
609 * min. I/O unit, so write it to the write-buffer and we are 705 * max. write unit, so write it to the write-buffer and we are
610 * done. 706 * done.
611 */ 707 */
612 memcpy(wbuf->buf, buf + written, len); 708 memcpy(wbuf->buf, buf + written, len);
613 709
614 wbuf->offs = offs; 710 wbuf->offs = offs;
711 if (c->leb_size - wbuf->offs >= c->max_write_size)
712 wbuf->size = c->max_write_size;
713 else
714 wbuf->size = c->leb_size - wbuf->offs;
715 wbuf->avail = wbuf->size - aligned_len;
615 wbuf->used = aligned_len; 716 wbuf->used = aligned_len;
616 wbuf->avail = c->min_io_size - aligned_len;
617 wbuf->next_ino = 0; 717 wbuf->next_ino = 0;
618 spin_unlock(&wbuf->lock); 718 spin_unlock(&wbuf->lock);
619 719
@@ -837,11 +937,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
837{ 937{
838 size_t size; 938 size_t size;
839 939
840 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); 940 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
841 if (!wbuf->buf) 941 if (!wbuf->buf)
842 return -ENOMEM; 942 return -ENOMEM;
843 943
844 size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 944 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
845 wbuf->inodes = kmalloc(size, GFP_KERNEL); 945 wbuf->inodes = kmalloc(size, GFP_KERNEL);
846 if (!wbuf->inodes) { 946 if (!wbuf->inodes) {
847 kfree(wbuf->buf); 947 kfree(wbuf->buf);
@@ -851,7 +951,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
851 951
852 wbuf->used = 0; 952 wbuf->used = 0;
853 wbuf->lnum = wbuf->offs = -1; 953 wbuf->lnum = wbuf->offs = -1;
854 wbuf->avail = c->min_io_size; 954 /*
955 * If the LEB starts at the max. write size aligned address, then
956 * write-buffer size has to be set to @c->max_write_size. Otherwise,
957 * set it to something smaller so that it ends at the closest max.
958 * write size boundary.
959 */
960 size = c->max_write_size - (c->leb_start % c->max_write_size);
961 wbuf->avail = wbuf->size = size;
855 wbuf->dtype = UBI_UNKNOWN; 962 wbuf->dtype = UBI_UNKNOWN;
856 wbuf->sync_callback = NULL; 963 wbuf->sync_callback = NULL;
857 mutex_init(&wbuf->io_mutex); 964 mutex_init(&wbuf->io_mutex);
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 8aacd64957a2..548acf494afd 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -160,7 +160,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
160 if (IS_RDONLY(inode)) 160 if (IS_RDONLY(inode))
161 return -EROFS; 161 return -EROFS;
162 162
163 if (!is_owner_or_cap(inode)) 163 if (!inode_owner_or_capable(inode))
164 return -EACCES; 164 return -EACCES;
165 165
166 if (get_user(flags, (int __user *) arg)) 166 if (get_user(flags, (int __user *) arg))
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 914f1bd89e57..aed25e864227 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
690{ 690{
691 struct ubifs_data_node *data; 691 struct ubifs_data_node *data;
692 int err, lnum, offs, compr_type, out_len; 692 int err, lnum, offs, compr_type, out_len;
693 int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; 693 int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
694 struct ubifs_inode *ui = ubifs_inode(inode); 694 struct ubifs_inode *ui = ubifs_inode(inode);
695 695
696 dbg_jnl("ino %lu, blk %u, len %d, key %s", 696 dbg_jnl("ino %lu, blk %u, len %d, key %s",
@@ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
698 DBGKEY(key)); 698 DBGKEY(key));
699 ubifs_assert(len <= UBIFS_BLOCK_SIZE); 699 ubifs_assert(len <= UBIFS_BLOCK_SIZE);
700 700
701 data = kmalloc(dlen, GFP_NOFS); 701 data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
702 if (!data) 702 if (!data) {
703 return -ENOMEM; 703 /*
704 * Fall-back to the write reserve buffer. Note, we might be
705 * currently on the memory reclaim path, when the kernel is
706 * trying to free some memory by writing out dirty pages. The
707 * write reserve buffer helps us to guarantee that we are
708 * always able to write the data.
709 */
710 allocated = 0;
711 mutex_lock(&c->write_reserve_mutex);
712 data = c->write_reserve_buf;
713 }
704 714
705 data->ch.node_type = UBIFS_DATA_NODE; 715 data->ch.node_type = UBIFS_DATA_NODE;
706 key_write(c, key, &data->key); 716 key_write(c, key, &data->key);
@@ -736,7 +746,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
736 goto out_ro; 746 goto out_ro;
737 747
738 finish_reservation(c); 748 finish_reservation(c);
739 kfree(data); 749 if (!allocated)
750 mutex_unlock(&c->write_reserve_mutex);
751 else
752 kfree(data);
740 return 0; 753 return 0;
741 754
742out_release: 755out_release:
@@ -745,7 +758,10 @@ out_ro:
745 ubifs_ro_mode(c, err); 758 ubifs_ro_mode(c, err);
746 finish_reservation(c); 759 finish_reservation(c);
747out_free: 760out_free:
748 kfree(data); 761 if (!allocated)
762 mutex_unlock(&c->write_reserve_mutex);
763 else
764 kfree(data);
749 return err; 765 return err;
750} 766}
751 767
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 4d4ca388889b..c7b25e2f7764 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1035,7 +1035,8 @@ static int scan_check_cb(struct ubifs_info *c,
1035 struct ubifs_scan_leb *sleb; 1035 struct ubifs_scan_leb *sleb;
1036 struct ubifs_scan_node *snod; 1036 struct ubifs_scan_node *snod;
1037 struct ubifs_lp_stats *lst = &data->lst; 1037 struct ubifs_lp_stats *lst = &data->lst;
1038 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; 1038 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
1039 void *buf = NULL;
1039 1040
1040 cat = lp->flags & LPROPS_CAT_MASK; 1041 cat = lp->flags & LPROPS_CAT_MASK;
1041 if (cat != LPROPS_UNCAT) { 1042 if (cat != LPROPS_UNCAT) {
@@ -1093,7 +1094,13 @@ static int scan_check_cb(struct ubifs_info *c,
1093 } 1094 }
1094 } 1095 }
1095 1096
1096 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 1097 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
1098 if (!buf) {
1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum);
1100 goto out;
1101 }
1102
1103 sleb = ubifs_scan(c, lnum, 0, buf, 0);
1097 if (IS_ERR(sleb)) { 1104 if (IS_ERR(sleb)) {
1098 /* 1105 /*
1099 * After an unclean unmount, empty and freeable LEBs 1106 * After an unclean unmount, empty and freeable LEBs
@@ -1105,7 +1112,8 @@ static int scan_check_cb(struct ubifs_info *c,
1105 lst->empty_lebs += 1; 1112 lst->empty_lebs += 1;
1106 lst->total_free += c->leb_size; 1113 lst->total_free += c->leb_size;
1107 lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1114 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1108 return LPT_SCAN_CONTINUE; 1115 ret = LPT_SCAN_CONTINUE;
1116 goto exit;
1109 } 1117 }
1110 1118
1111 if (lp->free + lp->dirty == c->leb_size && 1119 if (lp->free + lp->dirty == c->leb_size &&
@@ -1115,10 +1123,12 @@ static int scan_check_cb(struct ubifs_info *c,
1115 lst->total_free += lp->free; 1123 lst->total_free += lp->free;
1116 lst->total_dirty += lp->dirty; 1124 lst->total_dirty += lp->dirty;
1117 lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1125 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1118 return LPT_SCAN_CONTINUE; 1126 ret = LPT_SCAN_CONTINUE;
1127 goto exit;
1119 } 1128 }
1120 data->err = PTR_ERR(sleb); 1129 data->err = PTR_ERR(sleb);
1121 return LPT_SCAN_STOP; 1130 ret = LPT_SCAN_STOP;
1131 goto exit;
1122 } 1132 }
1123 1133
1124 is_idx = -1; 1134 is_idx = -1;
@@ -1236,7 +1246,10 @@ static int scan_check_cb(struct ubifs_info *c,
1236 } 1246 }
1237 1247
1238 ubifs_scan_destroy(sleb); 1248 ubifs_scan_destroy(sleb);
1239 return LPT_SCAN_CONTINUE; 1249 ret = LPT_SCAN_CONTINUE;
1250exit:
1251 vfree(buf);
1252 return ret;
1240 1253
1241out_print: 1254out_print:
1242 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " 1255 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
@@ -1246,6 +1259,7 @@ out_print:
1246out_destroy: 1259out_destroy:
1247 ubifs_scan_destroy(sleb); 1260 ubifs_scan_destroy(sleb);
1248out: 1261out:
1262 vfree(buf);
1249 data->err = -EINVAL; 1263 data->err = -EINVAL;
1250 return LPT_SCAN_STOP; 1264 return LPT_SCAN_STOP;
1251} 1265}
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 5c90dec5db0b..0a3c2c3f5c4a 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1628,29 +1628,35 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1628{ 1628{
1629 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; 1629 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
1630 int ret; 1630 int ret;
1631 void *buf = c->dbg->buf; 1631 void *buf, *p;
1632 1632
1633 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1633 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1634 return 0; 1634 return 0;
1635 1635
1636 buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
1637 if (!buf) {
1638 ubifs_err("cannot allocate memory for ltab checking");
1639 return 0;
1640 }
1641
1636 dbg_lp("LEB %d", lnum); 1642 dbg_lp("LEB %d", lnum);
1637 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1643 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1638 if (err) { 1644 if (err) {
1639 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); 1645 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
1640 return err; 1646 goto out;
1641 } 1647 }
1642 while (1) { 1648 while (1) {
1643 if (!is_a_node(c, buf, len)) { 1649 if (!is_a_node(c, p, len)) {
1644 int i, pad_len; 1650 int i, pad_len;
1645 1651
1646 pad_len = get_pad_len(c, buf, len); 1652 pad_len = get_pad_len(c, p, len);
1647 if (pad_len) { 1653 if (pad_len) {
1648 buf += pad_len; 1654 p += pad_len;
1649 len -= pad_len; 1655 len -= pad_len;
1650 dirty += pad_len; 1656 dirty += pad_len;
1651 continue; 1657 continue;
1652 } 1658 }
1653 if (!dbg_is_all_ff(buf, len)) { 1659 if (!dbg_is_all_ff(p, len)) {
1654 dbg_msg("invalid empty space in LEB %d at %d", 1660 dbg_msg("invalid empty space in LEB %d at %d",
1655 lnum, c->leb_size - len); 1661 lnum, c->leb_size - len);
1656 err = -EINVAL; 1662 err = -EINVAL;
@@ -1668,16 +1674,21 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1668 lnum, dirty, c->ltab[i].dirty); 1674 lnum, dirty, c->ltab[i].dirty);
1669 err = -EINVAL; 1675 err = -EINVAL;
1670 } 1676 }
1671 return err; 1677 goto out;
1672 } 1678 }
1673 node_type = get_lpt_node_type(c, buf, &node_num); 1679 node_type = get_lpt_node_type(c, p, &node_num);
1674 node_len = get_lpt_node_len(c, node_type); 1680 node_len = get_lpt_node_len(c, node_type);
1675 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); 1681 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
1676 if (ret == 1) 1682 if (ret == 1)
1677 dirty += node_len; 1683 dirty += node_len;
1678 buf += node_len; 1684 p += node_len;
1679 len -= node_len; 1685 len -= node_len;
1680 } 1686 }
1687
1688 err = 0;
1689out:
1690 vfree(buf);
1691 return err;
1681} 1692}
1682 1693
1683/** 1694/**
@@ -1870,25 +1881,31 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1870static void dump_lpt_leb(const struct ubifs_info *c, int lnum) 1881static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1871{ 1882{
1872 int err, len = c->leb_size, node_type, node_num, node_len, offs; 1883 int err, len = c->leb_size, node_type, node_num, node_len, offs;
1873 void *buf = c->dbg->buf; 1884 void *buf, *p;
1874 1885
1875 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 1886 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
1876 current->pid, lnum); 1887 current->pid, lnum);
1888 buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
1889 if (!buf) {
1890 ubifs_err("cannot allocate memory to dump LPT");
1891 return;
1892 }
1893
1877 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1894 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1878 if (err) { 1895 if (err) {
1879 ubifs_err("cannot read LEB %d, error %d", lnum, err); 1896 ubifs_err("cannot read LEB %d, error %d", lnum, err);
1880 return; 1897 goto out;
1881 } 1898 }
1882 while (1) { 1899 while (1) {
1883 offs = c->leb_size - len; 1900 offs = c->leb_size - len;
1884 if (!is_a_node(c, buf, len)) { 1901 if (!is_a_node(c, p, len)) {
1885 int pad_len; 1902 int pad_len;
1886 1903
1887 pad_len = get_pad_len(c, buf, len); 1904 pad_len = get_pad_len(c, p, len);
1888 if (pad_len) { 1905 if (pad_len) {
1889 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", 1906 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
1890 lnum, offs, pad_len); 1907 lnum, offs, pad_len);
1891 buf += pad_len; 1908 p += pad_len;
1892 len -= pad_len; 1909 len -= pad_len;
1893 continue; 1910 continue;
1894 } 1911 }
@@ -1898,7 +1915,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1898 break; 1915 break;
1899 } 1916 }
1900 1917
1901 node_type = get_lpt_node_type(c, buf, &node_num); 1918 node_type = get_lpt_node_type(c, p, &node_num);
1902 switch (node_type) { 1919 switch (node_type) {
1903 case UBIFS_LPT_PNODE: 1920 case UBIFS_LPT_PNODE:
1904 { 1921 {
@@ -1923,7 +1940,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1923 else 1940 else
1924 printk(KERN_DEBUG "LEB %d:%d, nnode, ", 1941 printk(KERN_DEBUG "LEB %d:%d, nnode, ",
1925 lnum, offs); 1942 lnum, offs);
1926 err = ubifs_unpack_nnode(c, buf, &nnode); 1943 err = ubifs_unpack_nnode(c, p, &nnode);
1927 for (i = 0; i < UBIFS_LPT_FANOUT; i++) { 1944 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1928 printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, 1945 printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
1929 nnode.nbranch[i].offs); 1946 nnode.nbranch[i].offs);
@@ -1944,15 +1961,18 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1944 break; 1961 break;
1945 default: 1962 default:
1946 ubifs_err("LPT node type %d not recognized", node_type); 1963 ubifs_err("LPT node type %d not recognized", node_type);
1947 return; 1964 goto out;
1948 } 1965 }
1949 1966
1950 buf += node_len; 1967 p += node_len;
1951 len -= node_len; 1968 len -= node_len;
1952 } 1969 }
1953 1970
1954 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 1971 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
1955 current->pid, lnum); 1972 current->pid, lnum);
1973out:
1974 vfree(buf);
1975 return;
1956} 1976}
1957 1977
1958/** 1978/**
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 82009c74b6a3..2cdbd31641d7 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -892,15 +892,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
892static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) 892static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
893{ 893{
894 int lnum, err = 0; 894 int lnum, err = 0;
895 void *buf;
895 896
896 /* Check no-orphans flag and skip this if no orphans */ 897 /* Check no-orphans flag and skip this if no orphans */
897 if (c->no_orphs) 898 if (c->no_orphs)
898 return 0; 899 return 0;
899 900
901 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
902 if (!buf) {
903 ubifs_err("cannot allocate memory to check orphans");
904 return 0;
905 }
906
900 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { 907 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
901 struct ubifs_scan_leb *sleb; 908 struct ubifs_scan_leb *sleb;
902 909
903 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 910 sleb = ubifs_scan(c, lnum, 0, buf, 0);
904 if (IS_ERR(sleb)) { 911 if (IS_ERR(sleb)) {
905 err = PTR_ERR(sleb); 912 err = PTR_ERR(sleb);
906 break; 913 break;
@@ -912,6 +919,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
912 break; 919 break;
913 } 920 }
914 921
922 vfree(buf);
915 return err; 923 return err;
916} 924}
917 925
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 77e9b874b6c2..936f2cbfe6b6 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -28,6 +28,23 @@
28 * UBIFS always cleans away all remnants of an unclean un-mount, so that 28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
29 * errors do not accumulate. However UBIFS defers recovery if it is mounted 29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
30 * read-only, and the flash is not modified in that case. 30 * read-only, and the flash is not modified in that case.
31 *
32 * The general UBIFS approach to the recovery is that it recovers from
33 * corruptions which could be caused by power cuts, but it refuses to recover
34 * from corruption caused by other reasons. And UBIFS tries to distinguish
35 * between these 2 reasons of corruptions and silently recover in the former
36 * case and loudly complain in the latter case.
37 *
38 * UBIFS writes only to erased LEBs, so it writes only to the flash space
39 * containing only 0xFFs. UBIFS also always writes strictly from the beginning
40 * of the LEB to the end. And UBIFS assumes that the underlying flash media
41 * writes in @c->max_write_size bytes at a time.
42 *
43 * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
44 * I/O unit corresponding to offset X to contain corrupted data, all the
45 * following min. I/O units have to contain empty space (all 0xFFs). If this is
46 * not true, the corruption cannot be the result of a power cut, and UBIFS
47 * refuses to mount.
31 */ 48 */
32 49
33#include <linux/crc32.h> 50#include <linux/crc32.h>
@@ -362,8 +379,9 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
362 * @offs: offset to check 379 * @offs: offset to check
363 * 380 *
364 * This function returns %1 if @offs was in the last write to the LEB whose data 381 * This function returns %1 if @offs was in the last write to the LEB whose data
365 * is in @buf, otherwise %0 is returned. The determination is made by checking 382 * is in @buf, otherwise %0 is returned. The determination is made by checking
366 * for subsequent empty space starting from the next @c->min_io_size boundary. 383 * for subsequent empty space starting from the next @c->max_write_size
384 * boundary.
367 */ 385 */
368static int is_last_write(const struct ubifs_info *c, void *buf, int offs) 386static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
369{ 387{
@@ -371,10 +389,10 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
371 uint8_t *p; 389 uint8_t *p;
372 390
373 /* 391 /*
374 * Round up to the next @c->min_io_size boundary i.e. @offs is in the 392 * Round up to the next @c->max_write_size boundary i.e. @offs is in
375 * last wbuf written. After that should be empty space. 393 * the last wbuf written. After that should be empty space.
376 */ 394 */
377 empty_offs = ALIGN(offs + 1, c->min_io_size); 395 empty_offs = ALIGN(offs + 1, c->max_write_size);
378 check_len = c->leb_size - empty_offs; 396 check_len = c->leb_size - empty_offs;
379 p = buf + empty_offs - offs; 397 p = buf + empty_offs - offs;
380 return is_empty(p, check_len); 398 return is_empty(p, check_len);
@@ -429,7 +447,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
429 int skip, dlen = le32_to_cpu(ch->len); 447 int skip, dlen = le32_to_cpu(ch->len);
430 448
431 /* Check for empty space after the corrupt node's common header */ 449 /* Check for empty space after the corrupt node's common header */
432 skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; 450 skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
433 if (is_empty(buf + skip, len - skip)) 451 if (is_empty(buf + skip, len - skip))
434 return 1; 452 return 1;
435 /* 453 /*
@@ -441,7 +459,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
441 return 0; 459 return 0;
442 } 460 }
443 /* Now we know the corrupt node's length we can skip over it */ 461 /* Now we know the corrupt node's length we can skip over it */
444 skip = ALIGN(offs + dlen, c->min_io_size) - offs; 462 skip = ALIGN(offs + dlen, c->max_write_size) - offs;
445 /* After which there should be empty space */ 463 /* After which there should be empty space */
446 if (is_empty(buf + skip, len - skip)) 464 if (is_empty(buf + skip, len - skip))
447 return 1; 465 return 1;
@@ -671,10 +689,14 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
671 } else { 689 } else {
672 int corruption = first_non_ff(buf, len); 690 int corruption = first_non_ff(buf, len);
673 691
692 /*
693 * See header comment for this file for more
694 * explanations about the reasons we have this check.
695 */
674 ubifs_err("corrupt empty space LEB %d:%d, corruption " 696 ubifs_err("corrupt empty space LEB %d:%d, corruption "
675 "starts at %d", lnum, offs, corruption); 697 "starts at %d", lnum, offs, corruption);
676 /* Make sure we dump interesting non-0xFF data */ 698 /* Make sure we dump interesting non-0xFF data */
677 offs = corruption; 699 offs += corruption;
678 buf += corruption; 700 buf += corruption;
679 goto corrupted; 701 goto corrupted;
680 } 702 }
@@ -836,12 +858,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
836static int recover_head(const struct ubifs_info *c, int lnum, int offs, 858static int recover_head(const struct ubifs_info *c, int lnum, int offs,
837 void *sbuf) 859 void *sbuf)
838{ 860{
839 int len, err; 861 int len = c->max_write_size, err;
840 862
841 if (c->min_io_size > 1)
842 len = c->min_io_size;
843 else
844 len = 512;
845 if (offs + len > c->leb_size) 863 if (offs + len > c->leb_size)
846 len = c->leb_size - offs; 864 len = c->leb_size - offs;
847 865
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 3e1ee57dbeaa..36216b46f772 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -328,7 +328,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
328 if (!quiet) 328 if (!quiet)
329 ubifs_err("empty space starts at non-aligned offset %d", 329 ubifs_err("empty space starts at non-aligned offset %d",
330 offs); 330 offs);
331 goto corrupted;; 331 goto corrupted;
332 } 332 }
333 333
334 ubifs_end_scan(c, sleb, lnum, offs); 334 ubifs_end_scan(c, sleb, lnum, offs);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 6e11c2975dcf..e5dc1e120e8d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -512,9 +512,12 @@ static int init_constants_early(struct ubifs_info *c)
512 512
513 c->leb_cnt = c->vi.size; 513 c->leb_cnt = c->vi.size;
514 c->leb_size = c->vi.usable_leb_size; 514 c->leb_size = c->vi.usable_leb_size;
515 c->leb_start = c->di.leb_start;
515 c->half_leb_size = c->leb_size / 2; 516 c->half_leb_size = c->leb_size / 2;
516 c->min_io_size = c->di.min_io_size; 517 c->min_io_size = c->di.min_io_size;
517 c->min_io_shift = fls(c->min_io_size) - 1; 518 c->min_io_shift = fls(c->min_io_size) - 1;
519 c->max_write_size = c->di.max_write_size;
520 c->max_write_shift = fls(c->max_write_size) - 1;
518 521
519 if (c->leb_size < UBIFS_MIN_LEB_SZ) { 522 if (c->leb_size < UBIFS_MIN_LEB_SZ) {
520 ubifs_err("too small LEBs (%d bytes), min. is %d bytes", 523 ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
@@ -534,6 +537,18 @@ static int init_constants_early(struct ubifs_info *c)
534 } 537 }
535 538
536 /* 539 /*
540 * Maximum write size has to be greater or equivalent to min. I/O
541 * size, and be multiple of min. I/O size.
542 */
543 if (c->max_write_size < c->min_io_size ||
544 c->max_write_size % c->min_io_size ||
545 !is_power_of_2(c->max_write_size)) {
546 ubifs_err("bad write buffer size %d for %d min. I/O unit",
547 c->max_write_size, c->min_io_size);
548 return -EINVAL;
549 }
550
551 /*
537 * UBIFS aligns all node to 8-byte boundary, so to make function in 552 * UBIFS aligns all node to 8-byte boundary, so to make function in
538 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is 553 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
539 * less than 8. 554 * less than 8.
@@ -541,6 +556,10 @@ static int init_constants_early(struct ubifs_info *c)
541 if (c->min_io_size < 8) { 556 if (c->min_io_size < 8) {
542 c->min_io_size = 8; 557 c->min_io_size = 8;
543 c->min_io_shift = 3; 558 c->min_io_shift = 3;
559 if (c->max_write_size < c->min_io_size) {
560 c->max_write_size = c->min_io_size;
561 c->max_write_shift = c->min_io_shift;
562 }
544 } 563 }
545 564
546 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); 565 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
@@ -1202,11 +1221,14 @@ static int mount_ubifs(struct ubifs_info *c)
1202 if (c->bulk_read == 1) 1221 if (c->bulk_read == 1)
1203 bu_init(c); 1222 bu_init(c);
1204 1223
1205 /* 1224 if (!c->ro_mount) {
1206 * We have to check all CRCs, even for data nodes, when we mount the FS 1225 c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ,
1207 * (specifically, when we are replaying). 1226 GFP_KERNEL);
1208 */ 1227 if (!c->write_reserve_buf)
1209 c->always_chk_crc = 1; 1228 goto out_free;
1229 }
1230
1231 c->mounting = 1;
1210 1232
1211 err = ubifs_read_superblock(c); 1233 err = ubifs_read_superblock(c);
1212 if (err) 1234 if (err)
@@ -1382,7 +1404,7 @@ static int mount_ubifs(struct ubifs_info *c)
1382 if (err) 1404 if (err)
1383 goto out_infos; 1405 goto out_infos;
1384 1406
1385 c->always_chk_crc = 0; 1407 c->mounting = 0;
1386 1408
1387 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", 1409 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
1388 c->vi.ubi_num, c->vi.vol_id, c->vi.name); 1410 c->vi.ubi_num, c->vi.vol_id, c->vi.name);
@@ -1403,6 +1425,7 @@ static int mount_ubifs(struct ubifs_info *c)
1403 1425
1404 dbg_msg("compiled on: " __DATE__ " at " __TIME__); 1426 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
1405 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); 1427 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
1428 dbg_msg("max. write size: %d bytes", c->max_write_size);
1406 dbg_msg("LEB size: %d bytes (%d KiB)", 1429 dbg_msg("LEB size: %d bytes (%d KiB)",
1407 c->leb_size, c->leb_size >> 10); 1430 c->leb_size, c->leb_size >> 10);
1408 dbg_msg("data journal heads: %d", 1431 dbg_msg("data journal heads: %d",
@@ -1432,9 +1455,9 @@ static int mount_ubifs(struct ubifs_info *c)
1432 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); 1455 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
1433 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", 1456 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
1434 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); 1457 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1435 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", 1458 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
1436 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1459 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1437 UBIFS_MAX_DENT_NODE_SZ); 1460 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
1438 dbg_msg("dead watermark: %d", c->dead_wm); 1461 dbg_msg("dead watermark: %d", c->dead_wm);
1439 dbg_msg("dark watermark: %d", c->dark_wm); 1462 dbg_msg("dark watermark: %d", c->dark_wm);
1440 dbg_msg("LEB overhead: %d", c->leb_overhead); 1463 dbg_msg("LEB overhead: %d", c->leb_overhead);
@@ -1474,6 +1497,7 @@ out_wbufs:
1474out_cbuf: 1497out_cbuf:
1475 kfree(c->cbuf); 1498 kfree(c->cbuf);
1476out_free: 1499out_free:
1500 kfree(c->write_reserve_buf);
1477 kfree(c->bu.buf); 1501 kfree(c->bu.buf);
1478 vfree(c->ileb_buf); 1502 vfree(c->ileb_buf);
1479 vfree(c->sbuf); 1503 vfree(c->sbuf);
@@ -1512,6 +1536,7 @@ static void ubifs_umount(struct ubifs_info *c)
1512 kfree(c->cbuf); 1536 kfree(c->cbuf);
1513 kfree(c->rcvrd_mst_node); 1537 kfree(c->rcvrd_mst_node);
1514 kfree(c->mst_node); 1538 kfree(c->mst_node);
1539 kfree(c->write_reserve_buf);
1515 kfree(c->bu.buf); 1540 kfree(c->bu.buf);
1516 vfree(c->ileb_buf); 1541 vfree(c->ileb_buf);
1517 vfree(c->sbuf); 1542 vfree(c->sbuf);
@@ -1543,7 +1568,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1543 mutex_lock(&c->umount_mutex); 1568 mutex_lock(&c->umount_mutex);
1544 dbg_save_space_info(c); 1569 dbg_save_space_info(c);
1545 c->remounting_rw = 1; 1570 c->remounting_rw = 1;
1546 c->always_chk_crc = 1;
1547 1571
1548 err = check_free_space(c); 1572 err = check_free_space(c);
1549 if (err) 1573 if (err)
@@ -1598,6 +1622,10 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1598 goto out; 1622 goto out;
1599 } 1623 }
1600 1624
1625 c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
1626 if (!c->write_reserve_buf)
1627 goto out;
1628
1601 err = ubifs_lpt_init(c, 0, 1); 1629 err = ubifs_lpt_init(c, 0, 1);
1602 if (err) 1630 if (err)
1603 goto out; 1631 goto out;
@@ -1650,7 +1678,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1650 dbg_gen("re-mounted read-write"); 1678 dbg_gen("re-mounted read-write");
1651 c->ro_mount = 0; 1679 c->ro_mount = 0;
1652 c->remounting_rw = 0; 1680 c->remounting_rw = 0;
1653 c->always_chk_crc = 0;
1654 err = dbg_check_space_info(c); 1681 err = dbg_check_space_info(c);
1655 mutex_unlock(&c->umount_mutex); 1682 mutex_unlock(&c->umount_mutex);
1656 return err; 1683 return err;
@@ -1663,11 +1690,12 @@ out:
1663 c->bgt = NULL; 1690 c->bgt = NULL;
1664 } 1691 }
1665 free_wbufs(c); 1692 free_wbufs(c);
1693 kfree(c->write_reserve_buf);
1694 c->write_reserve_buf = NULL;
1666 vfree(c->ileb_buf); 1695 vfree(c->ileb_buf);
1667 c->ileb_buf = NULL; 1696 c->ileb_buf = NULL;
1668 ubifs_lpt_free(c, 1); 1697 ubifs_lpt_free(c, 1);
1669 c->remounting_rw = 0; 1698 c->remounting_rw = 0;
1670 c->always_chk_crc = 0;
1671 mutex_unlock(&c->umount_mutex); 1699 mutex_unlock(&c->umount_mutex);
1672 return err; 1700 return err;
1673} 1701}
@@ -1707,6 +1735,8 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1707 free_wbufs(c); 1735 free_wbufs(c);
1708 vfree(c->orph_buf); 1736 vfree(c->orph_buf);
1709 c->orph_buf = NULL; 1737 c->orph_buf = NULL;
1738 kfree(c->write_reserve_buf);
1739 c->write_reserve_buf = NULL;
1710 vfree(c->ileb_buf); 1740 vfree(c->ileb_buf);
1711 c->ileb_buf = NULL; 1741 c->ileb_buf = NULL;
1712 ubifs_lpt_free(c, 1); 1742 ubifs_lpt_free(c, 1);
@@ -1937,6 +1967,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1937 mutex_init(&c->mst_mutex); 1967 mutex_init(&c->mst_mutex);
1938 mutex_init(&c->umount_mutex); 1968 mutex_init(&c->umount_mutex);
1939 mutex_init(&c->bu_mutex); 1969 mutex_init(&c->bu_mutex);
1970 mutex_init(&c->write_reserve_mutex);
1940 init_waitqueue_head(&c->cmt_wq); 1971 init_waitqueue_head(&c->cmt_wq);
1941 c->buds = RB_ROOT; 1972 c->buds = RB_ROOT;
1942 c->old_idx = RB_ROOT; 1973 c->old_idx = RB_ROOT;
@@ -1954,6 +1985,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1954 INIT_LIST_HEAD(&c->old_buds); 1985 INIT_LIST_HEAD(&c->old_buds);
1955 INIT_LIST_HEAD(&c->orph_list); 1986 INIT_LIST_HEAD(&c->orph_list);
1956 INIT_LIST_HEAD(&c->orph_new); 1987 INIT_LIST_HEAD(&c->orph_new);
1988 c->no_chk_data_crc = 1;
1957 1989
1958 c->vfs_sb = sb; 1990 c->vfs_sb = sb;
1959 c->highest_inum = UBIFS_FIRST_INO; 1991 c->highest_inum = UBIFS_FIRST_INO;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index ad9cf0133622..de485979ca39 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -447,8 +447,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
447 * 447 *
448 * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc 448 * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
449 * is true (it is controlled by corresponding mount option). However, if 449 * is true (it is controlled by corresponding mount option). However, if
450 * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always 450 * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to
451 * checked. 451 * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is
452 * because during mounting or re-mounting from R/O mode to R/W mode we may read
453 * journal nodes (when replying the journal or doing the recovery) and the
454 * journal nodes may potentially be corrupted, so checking is required.
452 */ 455 */
453static int try_read_node(const struct ubifs_info *c, void *buf, int type, 456static int try_read_node(const struct ubifs_info *c, void *buf, int type,
454 int len, int lnum, int offs) 457 int len, int lnum, int offs)
@@ -476,7 +479,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
476 if (node_len != len) 479 if (node_len != len)
477 return 0; 480 return 0;
478 481
479 if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) 482 if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
483 !c->remounting_rw)
480 return 1; 484 return 1;
481 485
482 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 486 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 381d6b207a52..8c40ad3c6721 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -151,6 +151,12 @@
151 */ 151 */
152#define WORST_COMPR_FACTOR 2 152#define WORST_COMPR_FACTOR 2
153 153
154/*
155 * How much memory is needed for a buffer where we comress a data node.
156 */
157#define COMPRESSED_DATA_NODE_BUF_SZ \
158 (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
159
154/* Maximum expected tree height for use by bottom_up_buf */ 160/* Maximum expected tree height for use by bottom_up_buf */
155#define BOTTOM_UP_HEIGHT 64 161#define BOTTOM_UP_HEIGHT 64
156 162
@@ -646,6 +652,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
646 * @offs: write-buffer offset in this logical eraseblock 652 * @offs: write-buffer offset in this logical eraseblock
647 * @avail: number of bytes available in the write-buffer 653 * @avail: number of bytes available in the write-buffer
648 * @used: number of used bytes in the write-buffer 654 * @used: number of used bytes in the write-buffer
655 * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range)
649 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, 656 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
650 * %UBI_UNKNOWN) 657 * %UBI_UNKNOWN)
651 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep 658 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
@@ -680,6 +687,7 @@ struct ubifs_wbuf {
680 int offs; 687 int offs;
681 int avail; 688 int avail;
682 int used; 689 int used;
690 int size;
683 int dtype; 691 int dtype;
684 int jhead; 692 int jhead;
685 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); 693 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
@@ -1003,6 +1011,11 @@ struct ubifs_debug_info;
1003 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu 1011 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
1004 * @bu: pre-allocated bulk-read information 1012 * @bu: pre-allocated bulk-read information
1005 * 1013 *
1014 * @write_reserve_mutex: protects @write_reserve_buf
1015 * @write_reserve_buf: on the write path we allocate memory, which might
1016 * sometimes be unavailable, in which case we use this
1017 * write reserve buffer
1018 *
1006 * @log_lebs: number of logical eraseblocks in the log 1019 * @log_lebs: number of logical eraseblocks in the log
1007 * @log_bytes: log size in bytes 1020 * @log_bytes: log size in bytes
1008 * @log_last: last LEB of the log 1021 * @log_last: last LEB of the log
@@ -1024,7 +1037,12 @@ struct ubifs_debug_info;
1024 * 1037 *
1025 * @min_io_size: minimal input/output unit size 1038 * @min_io_size: minimal input/output unit size
1026 * @min_io_shift: number of bits in @min_io_size minus one 1039 * @min_io_shift: number of bits in @min_io_size minus one
1040 * @max_write_size: maximum amount of bytes the underlying flash can write at a
1041 * time (MTD write buffer size)
1042 * @max_write_shift: number of bits in @max_write_size minus one
1027 * @leb_size: logical eraseblock size in bytes 1043 * @leb_size: logical eraseblock size in bytes
1044 * @leb_start: starting offset of logical eraseblocks within physical
1045 * eraseblocks
1028 * @half_leb_size: half LEB size 1046 * @half_leb_size: half LEB size
1029 * @idx_leb_size: how many bytes of an LEB are effectively available when it is 1047 * @idx_leb_size: how many bytes of an LEB are effectively available when it is
1030 * used to store indexing nodes (@leb_size - @max_idx_node_sz) 1048 * used to store indexing nodes (@leb_size - @max_idx_node_sz)
@@ -1166,22 +1184,21 @@ struct ubifs_debug_info;
1166 * @rp_uid: reserved pool user ID 1184 * @rp_uid: reserved pool user ID
1167 * @rp_gid: reserved pool group ID 1185 * @rp_gid: reserved pool group ID
1168 * 1186 *
1169 * @empty: if the UBI device is empty 1187 * @empty: %1 if the UBI device is empty
1188 * @need_recovery: %1 if the file-system needs recovery
1189 * @replaying: %1 during journal replay
1190 * @mounting: %1 while mounting
1191 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
1170 * @replay_tree: temporary tree used during journal replay 1192 * @replay_tree: temporary tree used during journal replay
1171 * @replay_list: temporary list used during journal replay 1193 * @replay_list: temporary list used during journal replay
1172 * @replay_buds: list of buds to replay 1194 * @replay_buds: list of buds to replay
1173 * @cs_sqnum: sequence number of first node in the log (commit start node) 1195 * @cs_sqnum: sequence number of first node in the log (commit start node)
1174 * @replay_sqnum: sequence number of node currently being replayed 1196 * @replay_sqnum: sequence number of node currently being replayed
1175 * @need_recovery: file-system needs recovery
1176 * @replaying: set to %1 during journal replay
1177 * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W 1197 * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
1178 * mode 1198 * mode
1179 * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted 1199 * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
1180 * FS to R/W mode 1200 * FS to R/W mode
1181 * @size_tree: inode size information for recovery 1201 * @size_tree: inode size information for recovery
1182 * @remounting_rw: set while re-mounting from R/O mode to R/W mode
1183 * @always_chk_crc: always check CRCs (while mounting and remounting to R/W
1184 * mode)
1185 * @mount_opts: UBIFS-specific mount options 1202 * @mount_opts: UBIFS-specific mount options
1186 * 1203 *
1187 * @dbg: debugging-related information 1204 * @dbg: debugging-related information
@@ -1250,6 +1267,9 @@ struct ubifs_info {
1250 struct mutex bu_mutex; 1267 struct mutex bu_mutex;
1251 struct bu_info bu; 1268 struct bu_info bu;
1252 1269
1270 struct mutex write_reserve_mutex;
1271 void *write_reserve_buf;
1272
1253 int log_lebs; 1273 int log_lebs;
1254 long long log_bytes; 1274 long long log_bytes;
1255 int log_last; 1275 int log_last;
@@ -1271,7 +1291,10 @@ struct ubifs_info {
1271 1291
1272 int min_io_size; 1292 int min_io_size;
1273 int min_io_shift; 1293 int min_io_shift;
1294 int max_write_size;
1295 int max_write_shift;
1274 int leb_size; 1296 int leb_size;
1297 int leb_start;
1275 int half_leb_size; 1298 int half_leb_size;
1276 int idx_leb_size; 1299 int idx_leb_size;
1277 int leb_cnt; 1300 int leb_cnt;
@@ -1402,19 +1425,19 @@ struct ubifs_info {
1402 gid_t rp_gid; 1425 gid_t rp_gid;
1403 1426
1404 /* The below fields are used only during mounting and re-mounting */ 1427 /* The below fields are used only during mounting and re-mounting */
1405 int empty; 1428 unsigned int empty:1;
1429 unsigned int need_recovery:1;
1430 unsigned int replaying:1;
1431 unsigned int mounting:1;
1432 unsigned int remounting_rw:1;
1406 struct rb_root replay_tree; 1433 struct rb_root replay_tree;
1407 struct list_head replay_list; 1434 struct list_head replay_list;
1408 struct list_head replay_buds; 1435 struct list_head replay_buds;
1409 unsigned long long cs_sqnum; 1436 unsigned long long cs_sqnum;
1410 unsigned long long replay_sqnum; 1437 unsigned long long replay_sqnum;
1411 int need_recovery;
1412 int replaying;
1413 struct list_head unclean_leb_list; 1438 struct list_head unclean_leb_list;
1414 struct ubifs_mst_node *rcvrd_mst_node; 1439 struct ubifs_mst_node *rcvrd_mst_node;
1415 struct rb_root size_tree; 1440 struct rb_root size_tree;
1416 int remounting_rw;
1417 int always_chk_crc;
1418 struct ubifs_mount_opts mount_opts; 1441 struct ubifs_mount_opts mount_opts;
1419 1442
1420#ifdef CONFIG_UBIFS_FS_DEBUG 1443#ifdef CONFIG_UBIFS_FS_DEBUG
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index 306ee39ef2c3..95518a9f589e 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -27,11 +27,10 @@
27#include "udf_i.h" 27#include "udf_i.h"
28#include "udf_sb.h" 28#include "udf_sb.h"
29 29
30#define udf_clear_bit(nr, addr) ext2_clear_bit(nr, addr) 30#define udf_clear_bit __test_and_clear_bit_le
31#define udf_set_bit(nr, addr) ext2_set_bit(nr, addr) 31#define udf_set_bit __test_and_set_bit_le
32#define udf_test_bit(nr, addr) ext2_test_bit(nr, addr) 32#define udf_test_bit test_bit_le
33#define udf_find_next_one_bit(addr, size, offset) \ 33#define udf_find_next_one_bit find_next_bit_le
34 ext2_find_next_bit(addr, size, offset)
35 34
36static int read_block_bitmap(struct super_block *sb, 35static int read_block_bitmap(struct super_block *sb,
37 struct udf_bitmap *bitmap, unsigned int block, 36 struct udf_bitmap *bitmap, unsigned int block,
@@ -297,7 +296,7 @@ repeat:
297 break; 296 break;
298 } 297 }
299 } else { 298 } else {
300 bit = udf_find_next_one_bit((char *)bh->b_data, 299 bit = udf_find_next_one_bit(bh->b_data,
301 sb->s_blocksize << 3, 300 sb->s_blocksize << 3,
302 group_start << 3); 301 group_start << 3);
303 if (bit < sb->s_blocksize << 3) 302 if (bit < sb->s_blocksize << 3)
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 89c78486cbbe..f391a2adc699 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -123,8 +123,8 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
123 if (inode->i_sb->s_blocksize < 123 if (inode->i_sb->s_blocksize <
124 (udf_file_entry_alloc_offset(inode) + 124 (udf_file_entry_alloc_offset(inode) +
125 pos + count)) { 125 pos + count)) {
126 udf_expand_file_adinicb(inode, pos + count, &err); 126 err = udf_expand_file_adinicb(inode);
127 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 127 if (err) {
128 udf_debug("udf_expand_adinicb: err=%d\n", err); 128 udf_debug("udf_expand_adinicb: err=%d\n", err);
129 up_write(&iinfo->i_data_sem); 129 up_write(&iinfo->i_data_sem);
130 return err; 130 return err;
@@ -237,7 +237,7 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr)
237 237
238 if ((attr->ia_valid & ATTR_SIZE) && 238 if ((attr->ia_valid & ATTR_SIZE) &&
239 attr->ia_size != i_size_read(inode)) { 239 attr->ia_size != i_size_read(inode)) {
240 error = vmtruncate(inode, attr->ia_size); 240 error = udf_setsize(inode, attr->ia_size);
241 if (error) 241 if (error)
242 return error; 242 return error;
243 } 243 }
@@ -249,5 +249,4 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr)
249 249
250const struct inode_operations udf_file_inode_operations = { 250const struct inode_operations udf_file_inode_operations = {
251 .setattr = udf_setattr, 251 .setattr = udf_setattr,
252 .truncate = udf_truncate,
253}; 252};
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index c6a2e782b97b..ccc814321414 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -73,14 +73,12 @@ void udf_evict_inode(struct inode *inode)
73 struct udf_inode_info *iinfo = UDF_I(inode); 73 struct udf_inode_info *iinfo = UDF_I(inode);
74 int want_delete = 0; 74 int want_delete = 0;
75 75
76 truncate_inode_pages(&inode->i_data, 0);
77
78 if (!inode->i_nlink && !is_bad_inode(inode)) { 76 if (!inode->i_nlink && !is_bad_inode(inode)) {
79 want_delete = 1; 77 want_delete = 1;
80 inode->i_size = 0; 78 udf_setsize(inode, 0);
81 udf_truncate(inode);
82 udf_update_inode(inode, IS_SYNC(inode)); 79 udf_update_inode(inode, IS_SYNC(inode));
83 } 80 } else
81 truncate_inode_pages(&inode->i_data, 0);
84 invalidate_inode_buffers(inode); 82 invalidate_inode_buffers(inode);
85 end_writeback(inode); 83 end_writeback(inode);
86 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && 84 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB &&
@@ -117,9 +115,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
117 115
118 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); 116 ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block);
119 if (unlikely(ret)) { 117 if (unlikely(ret)) {
120 loff_t isize = mapping->host->i_size; 118 struct inode *inode = mapping->host;
121 if (pos + len > isize) 119 struct udf_inode_info *iinfo = UDF_I(inode);
122 vmtruncate(mapping->host, isize); 120 loff_t isize = inode->i_size;
121
122 if (pos + len > isize) {
123 truncate_pagecache(inode, pos + len, isize);
124 if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
125 down_write(&iinfo->i_data_sem);
126 udf_truncate_extents(inode);
127 up_write(&iinfo->i_data_sem);
128 }
129 }
123 } 130 }
124 131
125 return ret; 132 return ret;
@@ -139,30 +146,31 @@ const struct address_space_operations udf_aops = {
139 .bmap = udf_bmap, 146 .bmap = udf_bmap,
140}; 147};
141 148
142void udf_expand_file_adinicb(struct inode *inode, int newsize, int *err) 149int udf_expand_file_adinicb(struct inode *inode)
143{ 150{
144 struct page *page; 151 struct page *page;
145 char *kaddr; 152 char *kaddr;
146 struct udf_inode_info *iinfo = UDF_I(inode); 153 struct udf_inode_info *iinfo = UDF_I(inode);
154 int err;
147 struct writeback_control udf_wbc = { 155 struct writeback_control udf_wbc = {
148 .sync_mode = WB_SYNC_NONE, 156 .sync_mode = WB_SYNC_NONE,
149 .nr_to_write = 1, 157 .nr_to_write = 1,
150 }; 158 };
151 159
152 /* from now on we have normal address_space methods */
153 inode->i_data.a_ops = &udf_aops;
154
155 if (!iinfo->i_lenAlloc) { 160 if (!iinfo->i_lenAlloc) {
156 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) 161 if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
157 iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; 162 iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
158 else 163 else
159 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 164 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
165 /* from now on we have normal address_space methods */
166 inode->i_data.a_ops = &udf_aops;
160 mark_inode_dirty(inode); 167 mark_inode_dirty(inode);
161 return; 168 return 0;
162 } 169 }
163 170
164 page = grab_cache_page(inode->i_mapping, 0); 171 page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
165 BUG_ON(!PageLocked(page)); 172 if (!page)
173 return -ENOMEM;
166 174
167 if (!PageUptodate(page)) { 175 if (!PageUptodate(page)) {
168 kaddr = kmap(page); 176 kaddr = kmap(page);
@@ -181,11 +189,24 @@ void udf_expand_file_adinicb(struct inode *inode, int newsize, int *err)
181 iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; 189 iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
182 else 190 else
183 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; 191 iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
184 192 /* from now on we have normal address_space methods */
185 inode->i_data.a_ops->writepage(page, &udf_wbc); 193 inode->i_data.a_ops = &udf_aops;
194 err = inode->i_data.a_ops->writepage(page, &udf_wbc);
195 if (err) {
196 /* Restore everything back so that we don't lose data... */
197 lock_page(page);
198 kaddr = kmap(page);
199 memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
200 inode->i_size);
201 kunmap(page);
202 unlock_page(page);
203 iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
204 inode->i_data.a_ops = &udf_adinicb_aops;
205 }
186 page_cache_release(page); 206 page_cache_release(page);
187
188 mark_inode_dirty(inode); 207 mark_inode_dirty(inode);
208
209 return err;
189} 210}
190 211
191struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block, 212struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, int *block,
@@ -348,8 +369,10 @@ static struct buffer_head *udf_getblk(struct inode *inode, long block,
348} 369}
349 370
350/* Extend the file by 'blocks' blocks, return the number of extents added */ 371/* Extend the file by 'blocks' blocks, return the number of extents added */
351int udf_extend_file(struct inode *inode, struct extent_position *last_pos, 372static int udf_do_extend_file(struct inode *inode,
352 struct kernel_long_ad *last_ext, sector_t blocks) 373 struct extent_position *last_pos,
374 struct kernel_long_ad *last_ext,
375 sector_t blocks)
353{ 376{
354 sector_t add; 377 sector_t add;
355 int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); 378 int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
@@ -357,6 +380,7 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
357 struct kernel_lb_addr prealloc_loc = {}; 380 struct kernel_lb_addr prealloc_loc = {};
358 int prealloc_len = 0; 381 int prealloc_len = 0;
359 struct udf_inode_info *iinfo; 382 struct udf_inode_info *iinfo;
383 int err;
360 384
361 /* The previous extent is fake and we should not extend by anything 385 /* The previous extent is fake and we should not extend by anything
362 * - there's nothing to do... */ 386 * - there's nothing to do... */
@@ -422,26 +446,29 @@ int udf_extend_file(struct inode *inode, struct extent_position *last_pos,
422 /* Create enough extents to cover the whole hole */ 446 /* Create enough extents to cover the whole hole */
423 while (blocks > add) { 447 while (blocks > add) {
424 blocks -= add; 448 blocks -= add;
425 if (udf_add_aext(inode, last_pos, &last_ext->extLocation, 449 err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
426 last_ext->extLength, 1) == -1) 450 last_ext->extLength, 1);
427 return -1; 451 if (err)
452 return err;
428 count++; 453 count++;
429 } 454 }
430 if (blocks) { 455 if (blocks) {
431 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | 456 last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED |
432 (blocks << sb->s_blocksize_bits); 457 (blocks << sb->s_blocksize_bits);
433 if (udf_add_aext(inode, last_pos, &last_ext->extLocation, 458 err = udf_add_aext(inode, last_pos, &last_ext->extLocation,
434 last_ext->extLength, 1) == -1) 459 last_ext->extLength, 1);
435 return -1; 460 if (err)
461 return err;
436 count++; 462 count++;
437 } 463 }
438 464
439out: 465out:
440 /* Do we have some preallocated blocks saved? */ 466 /* Do we have some preallocated blocks saved? */
441 if (prealloc_len) { 467 if (prealloc_len) {
442 if (udf_add_aext(inode, last_pos, &prealloc_loc, 468 err = udf_add_aext(inode, last_pos, &prealloc_loc,
443 prealloc_len, 1) == -1) 469 prealloc_len, 1);
444 return -1; 470 if (err)
471 return err;
445 last_ext->extLocation = prealloc_loc; 472 last_ext->extLocation = prealloc_loc;
446 last_ext->extLength = prealloc_len; 473 last_ext->extLength = prealloc_len;
447 count++; 474 count++;
@@ -453,11 +480,68 @@ out:
453 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 480 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
454 last_pos->offset -= sizeof(struct long_ad); 481 last_pos->offset -= sizeof(struct long_ad);
455 else 482 else
456 return -1; 483 return -EIO;
457 484
458 return count; 485 return count;
459} 486}
460 487
488static int udf_extend_file(struct inode *inode, loff_t newsize)
489{
490
491 struct extent_position epos;
492 struct kernel_lb_addr eloc;
493 uint32_t elen;
494 int8_t etype;
495 struct super_block *sb = inode->i_sb;
496 sector_t first_block = newsize >> sb->s_blocksize_bits, offset;
497 int adsize;
498 struct udf_inode_info *iinfo = UDF_I(inode);
499 struct kernel_long_ad extent;
500 int err;
501
502 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
503 adsize = sizeof(struct short_ad);
504 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
505 adsize = sizeof(struct long_ad);
506 else
507 BUG();
508
509 etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
510
511 /* File has extent covering the new size (could happen when extending
512 * inside a block)? */
513 if (etype != -1)
514 return 0;
515 if (newsize & (sb->s_blocksize - 1))
516 offset++;
517 /* Extended file just to the boundary of the last file block? */
518 if (offset == 0)
519 return 0;
520
521 /* Truncate is extending the file by 'offset' blocks */
522 if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) ||
523 (epos.bh && epos.offset == sizeof(struct allocExtDesc))) {
524 /* File has no extents at all or has empty last
525 * indirect extent! Create a fake extent... */
526 extent.extLocation.logicalBlockNum = 0;
527 extent.extLocation.partitionReferenceNum = 0;
528 extent.extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
529 } else {
530 epos.offset -= adsize;
531 etype = udf_next_aext(inode, &epos, &extent.extLocation,
532 &extent.extLength, 0);
533 extent.extLength |= etype << 30;
534 }
535 err = udf_do_extend_file(inode, &epos, &extent, offset);
536 if (err < 0)
537 goto out;
538 err = 0;
539 iinfo->i_lenExtents = newsize;
540out:
541 brelse(epos.bh);
542 return err;
543}
544
461static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, 545static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
462 int *err, sector_t *phys, int *new) 546 int *err, sector_t *phys, int *new)
463{ 547{
@@ -540,7 +624,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
540 elen = EXT_RECORDED_ALLOCATED | 624 elen = EXT_RECORDED_ALLOCATED |
541 ((elen + inode->i_sb->s_blocksize - 1) & 625 ((elen + inode->i_sb->s_blocksize - 1) &
542 ~(inode->i_sb->s_blocksize - 1)); 626 ~(inode->i_sb->s_blocksize - 1));
543 etype = udf_write_aext(inode, &cur_epos, &eloc, elen, 1); 627 udf_write_aext(inode, &cur_epos, &eloc, elen, 1);
544 } 628 }
545 brelse(prev_epos.bh); 629 brelse(prev_epos.bh);
546 brelse(cur_epos.bh); 630 brelse(cur_epos.bh);
@@ -564,19 +648,17 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
564 memset(&laarr[0].extLocation, 0x00, 648 memset(&laarr[0].extLocation, 0x00,
565 sizeof(struct kernel_lb_addr)); 649 sizeof(struct kernel_lb_addr));
566 laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED; 650 laarr[0].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED;
567 /* Will udf_extend_file() create real extent from 651 /* Will udf_do_extend_file() create real extent from
568 a fake one? */ 652 a fake one? */
569 startnum = (offset > 0); 653 startnum = (offset > 0);
570 } 654 }
571 /* Create extents for the hole between EOF and offset */ 655 /* Create extents for the hole between EOF and offset */
572 ret = udf_extend_file(inode, &prev_epos, laarr, offset); 656 ret = udf_do_extend_file(inode, &prev_epos, laarr, offset);
573 if (ret == -1) { 657 if (ret < 0) {
574 brelse(prev_epos.bh); 658 brelse(prev_epos.bh);
575 brelse(cur_epos.bh); 659 brelse(cur_epos.bh);
576 brelse(next_epos.bh); 660 brelse(next_epos.bh);
577 /* We don't really know the error here so we just make 661 *err = ret;
578 * something up */
579 *err = -ENOSPC;
580 return NULL; 662 return NULL;
581 } 663 }
582 c = 0; 664 c = 0;
@@ -1005,52 +1087,66 @@ struct buffer_head *udf_bread(struct inode *inode, int block,
1005 return NULL; 1087 return NULL;
1006} 1088}
1007 1089
1008void udf_truncate(struct inode *inode) 1090int udf_setsize(struct inode *inode, loff_t newsize)
1009{ 1091{
1010 int offset;
1011 int err; 1092 int err;
1012 struct udf_inode_info *iinfo; 1093 struct udf_inode_info *iinfo;
1094 int bsize = 1 << inode->i_blkbits;
1013 1095
1014 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 1096 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1015 S_ISLNK(inode->i_mode))) 1097 S_ISLNK(inode->i_mode)))
1016 return; 1098 return -EINVAL;
1017 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1099 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1018 return; 1100 return -EPERM;
1019 1101
1020 iinfo = UDF_I(inode); 1102 iinfo = UDF_I(inode);
1021 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1103 if (newsize > inode->i_size) {
1022 down_write(&iinfo->i_data_sem); 1104 down_write(&iinfo->i_data_sem);
1023 if (inode->i_sb->s_blocksize < 1105 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1024 (udf_file_entry_alloc_offset(inode) + 1106 if (bsize <
1025 inode->i_size)) { 1107 (udf_file_entry_alloc_offset(inode) + newsize)) {
1026 udf_expand_file_adinicb(inode, inode->i_size, &err); 1108 err = udf_expand_file_adinicb(inode);
1027 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { 1109 if (err) {
1028 inode->i_size = iinfo->i_lenAlloc; 1110 up_write(&iinfo->i_data_sem);
1029 up_write(&iinfo->i_data_sem); 1111 return err;
1030 return; 1112 }
1031 } else 1113 } else
1032 udf_truncate_extents(inode); 1114 iinfo->i_lenAlloc = newsize;
1033 } else { 1115 }
1034 offset = inode->i_size & (inode->i_sb->s_blocksize - 1); 1116 err = udf_extend_file(inode, newsize);
1035 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + offset, 1117 if (err) {
1036 0x00, inode->i_sb->s_blocksize - 1118 up_write(&iinfo->i_data_sem);
1037 offset - udf_file_entry_alloc_offset(inode)); 1119 return err;
1038 iinfo->i_lenAlloc = inode->i_size;
1039 } 1120 }
1121 truncate_setsize(inode, newsize);
1040 up_write(&iinfo->i_data_sem); 1122 up_write(&iinfo->i_data_sem);
1041 } else { 1123 } else {
1042 block_truncate_page(inode->i_mapping, inode->i_size, 1124 if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
1043 udf_get_block); 1125 down_write(&iinfo->i_data_sem);
1126 memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + newsize,
1127 0x00, bsize - newsize -
1128 udf_file_entry_alloc_offset(inode));
1129 iinfo->i_lenAlloc = newsize;
1130 truncate_setsize(inode, newsize);
1131 up_write(&iinfo->i_data_sem);
1132 goto update_time;
1133 }
1134 err = block_truncate_page(inode->i_mapping, newsize,
1135 udf_get_block);
1136 if (err)
1137 return err;
1044 down_write(&iinfo->i_data_sem); 1138 down_write(&iinfo->i_data_sem);
1139 truncate_setsize(inode, newsize);
1045 udf_truncate_extents(inode); 1140 udf_truncate_extents(inode);
1046 up_write(&iinfo->i_data_sem); 1141 up_write(&iinfo->i_data_sem);
1047 } 1142 }
1048 1143update_time:
1049 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); 1144 inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
1050 if (IS_SYNC(inode)) 1145 if (IS_SYNC(inode))
1051 udf_sync_inode(inode); 1146 udf_sync_inode(inode);
1052 else 1147 else
1053 mark_inode_dirty(inode); 1148 mark_inode_dirty(inode);
1149 return 0;
1054} 1150}
1055 1151
1056static void __udf_read_inode(struct inode *inode) 1152static void __udf_read_inode(struct inode *inode)
@@ -1637,14 +1733,13 @@ struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino)
1637 return NULL; 1733 return NULL;
1638} 1734}
1639 1735
1640int8_t udf_add_aext(struct inode *inode, struct extent_position *epos, 1736int udf_add_aext(struct inode *inode, struct extent_position *epos,
1641 struct kernel_lb_addr *eloc, uint32_t elen, int inc) 1737 struct kernel_lb_addr *eloc, uint32_t elen, int inc)
1642{ 1738{
1643 int adsize; 1739 int adsize;
1644 struct short_ad *sad = NULL; 1740 struct short_ad *sad = NULL;
1645 struct long_ad *lad = NULL; 1741 struct long_ad *lad = NULL;
1646 struct allocExtDesc *aed; 1742 struct allocExtDesc *aed;
1647 int8_t etype;
1648 uint8_t *ptr; 1743 uint8_t *ptr;
1649 struct udf_inode_info *iinfo = UDF_I(inode); 1744 struct udf_inode_info *iinfo = UDF_I(inode);
1650 1745
@@ -1660,7 +1755,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1660 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) 1755 else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
1661 adsize = sizeof(struct long_ad); 1756 adsize = sizeof(struct long_ad);
1662 else 1757 else
1663 return -1; 1758 return -EIO;
1664 1759
1665 if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) { 1760 if (epos->offset + (2 * adsize) > inode->i_sb->s_blocksize) {
1666 unsigned char *sptr, *dptr; 1761 unsigned char *sptr, *dptr;
@@ -1672,12 +1767,12 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1672 obloc.partitionReferenceNum, 1767 obloc.partitionReferenceNum,
1673 obloc.logicalBlockNum, &err); 1768 obloc.logicalBlockNum, &err);
1674 if (!epos->block.logicalBlockNum) 1769 if (!epos->block.logicalBlockNum)
1675 return -1; 1770 return -ENOSPC;
1676 nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb, 1771 nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb,
1677 &epos->block, 1772 &epos->block,
1678 0)); 1773 0));
1679 if (!nbh) 1774 if (!nbh)
1680 return -1; 1775 return -EIO;
1681 lock_buffer(nbh); 1776 lock_buffer(nbh);
1682 memset(nbh->b_data, 0x00, inode->i_sb->s_blocksize); 1777 memset(nbh->b_data, 0x00, inode->i_sb->s_blocksize);
1683 set_buffer_uptodate(nbh); 1778 set_buffer_uptodate(nbh);
@@ -1746,7 +1841,7 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1746 epos->bh = nbh; 1841 epos->bh = nbh;
1747 } 1842 }
1748 1843
1749 etype = udf_write_aext(inode, epos, eloc, elen, inc); 1844 udf_write_aext(inode, epos, eloc, elen, inc);
1750 1845
1751 if (!epos->bh) { 1846 if (!epos->bh) {
1752 iinfo->i_lenAlloc += adsize; 1847 iinfo->i_lenAlloc += adsize;
@@ -1764,11 +1859,11 @@ int8_t udf_add_aext(struct inode *inode, struct extent_position *epos,
1764 mark_buffer_dirty_inode(epos->bh, inode); 1859 mark_buffer_dirty_inode(epos->bh, inode);
1765 } 1860 }
1766 1861
1767 return etype; 1862 return 0;
1768} 1863}
1769 1864
1770int8_t udf_write_aext(struct inode *inode, struct extent_position *epos, 1865void udf_write_aext(struct inode *inode, struct extent_position *epos,
1771 struct kernel_lb_addr *eloc, uint32_t elen, int inc) 1866 struct kernel_lb_addr *eloc, uint32_t elen, int inc)
1772{ 1867{
1773 int adsize; 1868 int adsize;
1774 uint8_t *ptr; 1869 uint8_t *ptr;
@@ -1798,7 +1893,7 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
1798 adsize = sizeof(struct long_ad); 1893 adsize = sizeof(struct long_ad);
1799 break; 1894 break;
1800 default: 1895 default:
1801 return -1; 1896 return;
1802 } 1897 }
1803 1898
1804 if (epos->bh) { 1899 if (epos->bh) {
@@ -1817,8 +1912,6 @@ int8_t udf_write_aext(struct inode *inode, struct extent_position *epos,
1817 1912
1818 if (inc) 1913 if (inc)
1819 epos->offset += adsize; 1914 epos->offset += adsize;
1820
1821 return (elen >> 30);
1822} 1915}
1823 1916
1824int8_t udf_next_aext(struct inode *inode, struct extent_position *epos, 1917int8_t udf_next_aext(struct inode *inode, struct extent_position *epos,
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 2be0f9eb86d2..f1dce848ef96 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -32,6 +32,8 @@
32#include <linux/crc-itu-t.h> 32#include <linux/crc-itu-t.h>
33#include <linux/exportfs.h> 33#include <linux/exportfs.h>
34 34
35enum { UDF_MAX_LINKS = 0xffff };
36
35static inline int udf_match(int len1, const unsigned char *name1, int len2, 37static inline int udf_match(int len1, const unsigned char *name1, int len2,
36 const unsigned char *name2) 38 const unsigned char *name2)
37{ 39{
@@ -650,7 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, int mode)
650 struct udf_inode_info *iinfo; 652 struct udf_inode_info *iinfo;
651 653
652 err = -EMLINK; 654 err = -EMLINK;
653 if (dir->i_nlink >= (256 << sizeof(dir->i_nlink)) - 1) 655 if (dir->i_nlink >= UDF_MAX_LINKS)
654 goto out; 656 goto out;
655 657
656 err = -EIO; 658 err = -EIO;
@@ -1034,9 +1036,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
1034 struct fileIdentDesc cfi, *fi; 1036 struct fileIdentDesc cfi, *fi;
1035 int err; 1037 int err;
1036 1038
1037 if (inode->i_nlink >= (256 << sizeof(inode->i_nlink)) - 1) { 1039 if (inode->i_nlink >= UDF_MAX_LINKS)
1038 return -EMLINK; 1040 return -EMLINK;
1039 }
1040 1041
1041 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); 1042 fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err);
1042 if (!fi) { 1043 if (!fi) {
@@ -1131,9 +1132,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1131 goto end_rename; 1132 goto end_rename;
1132 1133
1133 retval = -EMLINK; 1134 retval = -EMLINK;
1134 if (!new_inode && 1135 if (!new_inode && new_dir->i_nlink >= UDF_MAX_LINKS)
1135 new_dir->i_nlink >=
1136 (256 << sizeof(new_dir->i_nlink)) - 1)
1137 goto end_rename; 1136 goto end_rename;
1138 } 1137 }
1139 if (!nfi) { 1138 if (!nfi) {
@@ -1287,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
1287 struct fid *fid = (struct fid *)fh; 1286 struct fid *fid = (struct fid *)fh;
1288 int type = FILEID_UDF_WITHOUT_PARENT; 1287 int type = FILEID_UDF_WITHOUT_PARENT;
1289 1288
1290 if (len < 3 || (connectable && len < 5)) 1289 if (connectable && (len < 5)) {
1290 *lenp = 5;
1291 return 255; 1291 return 255;
1292 } else if (len < 3) {
1293 *lenp = 3;
1294 return 255;
1295 }
1292 1296
1293 *lenp = 3; 1297 *lenp = 3;
1294 fid->udf.block = location.logicalBlockNum; 1298 fid->udf.block = location.logicalBlockNum;
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 225527cdc885..8424308db4b4 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -197,6 +197,11 @@ static void udf_update_alloc_ext_desc(struct inode *inode,
197 mark_buffer_dirty_inode(epos->bh, inode); 197 mark_buffer_dirty_inode(epos->bh, inode);
198} 198}
199 199
200/*
201 * Truncate extents of inode to inode->i_size. This function can be used only
202 * for making file shorter. For making file longer, udf_extend_file() has to
203 * be used.
204 */
200void udf_truncate_extents(struct inode *inode) 205void udf_truncate_extents(struct inode *inode)
201{ 206{
202 struct extent_position epos; 207 struct extent_position epos;
@@ -219,96 +224,65 @@ void udf_truncate_extents(struct inode *inode)
219 etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); 224 etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
220 byte_offset = (offset << sb->s_blocksize_bits) + 225 byte_offset = (offset << sb->s_blocksize_bits) +
221 (inode->i_size & (sb->s_blocksize - 1)); 226 (inode->i_size & (sb->s_blocksize - 1));
222 if (etype != -1) { 227 if (etype == -1) {
223 epos.offset -= adsize; 228 /* We should extend the file? */
224 extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset); 229 WARN_ON(byte_offset);
225 epos.offset += adsize; 230 return;
226 if (byte_offset) 231 }
227 lenalloc = epos.offset; 232 epos.offset -= adsize;
228 else 233 extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset);
229 lenalloc = epos.offset - adsize; 234 epos.offset += adsize;
230 235 if (byte_offset)
231 if (!epos.bh) 236 lenalloc = epos.offset;
232 lenalloc -= udf_file_entry_alloc_offset(inode); 237 else
233 else 238 lenalloc = epos.offset - adsize;
234 lenalloc -= sizeof(struct allocExtDesc);
235
236 while ((etype = udf_current_aext(inode, &epos, &eloc,
237 &elen, 0)) != -1) {
238 if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) {
239 udf_write_aext(inode, &epos, &neloc, nelen, 0);
240 if (indirect_ext_len) {
241 /* We managed to free all extents in the
242 * indirect extent - free it too */
243 BUG_ON(!epos.bh);
244 udf_free_blocks(sb, inode, &epos.block,
245 0, indirect_ext_len);
246 } else if (!epos.bh) {
247 iinfo->i_lenAlloc = lenalloc;
248 mark_inode_dirty(inode);
249 } else
250 udf_update_alloc_ext_desc(inode,
251 &epos, lenalloc);
252 brelse(epos.bh);
253 epos.offset = sizeof(struct allocExtDesc);
254 epos.block = eloc;
255 epos.bh = udf_tread(sb,
256 udf_get_lb_pblock(sb, &eloc, 0));
257 if (elen)
258 indirect_ext_len =
259 (elen + sb->s_blocksize - 1) >>
260 sb->s_blocksize_bits;
261 else
262 indirect_ext_len = 1;
263 } else {
264 extent_trunc(inode, &epos, &eloc, etype,
265 elen, 0);
266 epos.offset += adsize;
267 }
268 }
269 239
270 if (indirect_ext_len) { 240 if (!epos.bh)
271 BUG_ON(!epos.bh); 241 lenalloc -= udf_file_entry_alloc_offset(inode);
272 udf_free_blocks(sb, inode, &epos.block, 0, 242 else
273 indirect_ext_len); 243 lenalloc -= sizeof(struct allocExtDesc);
274 } else if (!epos.bh) {
275 iinfo->i_lenAlloc = lenalloc;
276 mark_inode_dirty(inode);
277 } else
278 udf_update_alloc_ext_desc(inode, &epos, lenalloc);
279 } else if (inode->i_size) {
280 if (byte_offset) {
281 struct kernel_long_ad extent;
282 244
283 /* 245 while ((etype = udf_current_aext(inode, &epos, &eloc,
284 * OK, there is not extent covering inode->i_size and 246 &elen, 0)) != -1) {
285 * no extent above inode->i_size => truncate is 247 if (etype == (EXT_NEXT_EXTENT_ALLOCDECS >> 30)) {
286 * extending the file by 'offset' blocks. 248 udf_write_aext(inode, &epos, &neloc, nelen, 0);
287 */ 249 if (indirect_ext_len) {
288 if ((!epos.bh && 250 /* We managed to free all extents in the
289 epos.offset == 251 * indirect extent - free it too */
290 udf_file_entry_alloc_offset(inode)) || 252 BUG_ON(!epos.bh);
291 (epos.bh && epos.offset == 253 udf_free_blocks(sb, inode, &epos.block,
292 sizeof(struct allocExtDesc))) { 254 0, indirect_ext_len);
293 /* File has no extents at all or has empty last 255 } else if (!epos.bh) {
294 * indirect extent! Create a fake extent... */ 256 iinfo->i_lenAlloc = lenalloc;
295 extent.extLocation.logicalBlockNum = 0; 257 mark_inode_dirty(inode);
296 extent.extLocation.partitionReferenceNum = 0; 258 } else
297 extent.extLength = 259 udf_update_alloc_ext_desc(inode,
298 EXT_NOT_RECORDED_NOT_ALLOCATED; 260 &epos, lenalloc);
299 } else { 261 brelse(epos.bh);
300 epos.offset -= adsize; 262 epos.offset = sizeof(struct allocExtDesc);
301 etype = udf_next_aext(inode, &epos, 263 epos.block = eloc;
302 &extent.extLocation, 264 epos.bh = udf_tread(sb,
303 &extent.extLength, 0); 265 udf_get_lb_pblock(sb, &eloc, 0));
304 extent.extLength |= etype << 30; 266 if (elen)
305 } 267 indirect_ext_len =
306 udf_extend_file(inode, &epos, &extent, 268 (elen + sb->s_blocksize - 1) >>
307 offset + 269 sb->s_blocksize_bits;
308 ((inode->i_size & 270 else
309 (sb->s_blocksize - 1)) != 0)); 271 indirect_ext_len = 1;
272 } else {
273 extent_trunc(inode, &epos, &eloc, etype, elen, 0);
274 epos.offset += adsize;
310 } 275 }
311 } 276 }
277
278 if (indirect_ext_len) {
279 BUG_ON(!epos.bh);
280 udf_free_blocks(sb, inode, &epos.block, 0, indirect_ext_len);
281 } else if (!epos.bh) {
282 iinfo->i_lenAlloc = lenalloc;
283 mark_inode_dirty(inode);
284 } else
285 udf_update_alloc_ext_desc(inode, &epos, lenalloc);
312 iinfo->i_lenExtents = inode->i_size; 286 iinfo->i_lenExtents = inode->i_size;
313 287
314 brelse(epos.bh); 288 brelse(epos.bh);
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index eba48209f9f3..dbd52d4b5eed 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -136,22 +136,20 @@ extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
136extern long udf_ioctl(struct file *, unsigned int, unsigned long); 136extern long udf_ioctl(struct file *, unsigned int, unsigned long);
137/* inode.c */ 137/* inode.c */
138extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *); 138extern struct inode *udf_iget(struct super_block *, struct kernel_lb_addr *);
139extern void udf_expand_file_adinicb(struct inode *, int, int *); 139extern int udf_expand_file_adinicb(struct inode *);
140extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); 140extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *);
141extern struct buffer_head *udf_bread(struct inode *, int, int, int *); 141extern struct buffer_head *udf_bread(struct inode *, int, int, int *);
142extern void udf_truncate(struct inode *); 142extern int udf_setsize(struct inode *, loff_t);
143extern void udf_read_inode(struct inode *); 143extern void udf_read_inode(struct inode *);
144extern void udf_evict_inode(struct inode *); 144extern void udf_evict_inode(struct inode *);
145extern int udf_write_inode(struct inode *, struct writeback_control *wbc); 145extern int udf_write_inode(struct inode *, struct writeback_control *wbc);
146extern long udf_block_map(struct inode *, sector_t); 146extern long udf_block_map(struct inode *, sector_t);
147extern int udf_extend_file(struct inode *, struct extent_position *,
148 struct kernel_long_ad *, sector_t);
149extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *, 147extern int8_t inode_bmap(struct inode *, sector_t, struct extent_position *,
150 struct kernel_lb_addr *, uint32_t *, sector_t *); 148 struct kernel_lb_addr *, uint32_t *, sector_t *);
151extern int8_t udf_add_aext(struct inode *, struct extent_position *, 149extern int udf_add_aext(struct inode *, struct extent_position *,
150 struct kernel_lb_addr *, uint32_t, int);
151extern void udf_write_aext(struct inode *, struct extent_position *,
152 struct kernel_lb_addr *, uint32_t, int); 152 struct kernel_lb_addr *, uint32_t, int);
153extern int8_t udf_write_aext(struct inode *, struct extent_position *,
154 struct kernel_lb_addr *, uint32_t, int);
155extern int8_t udf_delete_aext(struct inode *, struct extent_position, 153extern int8_t udf_delete_aext(struct inode *, struct extent_position,
156 struct kernel_lb_addr, uint32_t); 154 struct kernel_lb_addr, uint32_t);
157extern int8_t udf_next_aext(struct inode *, struct extent_position *, 155extern int8_t udf_next_aext(struct inode *, struct extent_position *,
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig
index 30c8f223253d..e4f10a40768a 100644
--- a/fs/ufs/Kconfig
+++ b/fs/ufs/Kconfig
@@ -1,7 +1,6 @@
1config UFS_FS 1config UFS_FS
2 tristate "UFS file system support (read only)" 2 tristate "UFS file system support (read only)"
3 depends on BLOCK 3 depends on BLOCK
4 depends on BKL # probably fixable
5 help 4 help
6 BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, 5 BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD,
7 OpenBSD and NeXTstep) use a file system called UFS. Some System V 6 OpenBSD and NeXTstep) use a file system called UFS. Some System V
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 2b251f2093af..03c255f12df5 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -34,7 +34,6 @@
34#include <linux/stat.h> 34#include <linux/stat.h>
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/smp_lock.h>
38#include <linux/buffer_head.h> 37#include <linux/buffer_head.h>
39#include <linux/writeback.h> 38#include <linux/writeback.h>
40 39
@@ -43,7 +42,7 @@
43#include "swab.h" 42#include "swab.h"
44#include "util.h" 43#include "util.h"
45 44
46static u64 ufs_frag_map(struct inode *inode, sector_t frag); 45static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock);
47 46
48static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4]) 47static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4])
49{ 48{
@@ -82,7 +81,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off
82 * the begining of the filesystem. 81 * the begining of the filesystem.
83 */ 82 */
84 83
85static u64 ufs_frag_map(struct inode *inode, sector_t frag) 84static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock)
86{ 85{
87 struct ufs_inode_info *ufsi = UFS_I(inode); 86 struct ufs_inode_info *ufsi = UFS_I(inode);
88 struct super_block *sb = inode->i_sb; 87 struct super_block *sb = inode->i_sb;
@@ -107,7 +106,8 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag)
107 106
108 p = offsets; 107 p = offsets;
109 108
110 lock_kernel(); 109 if (needs_lock)
110 lock_ufs(sb);
111 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) 111 if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
112 goto ufs2; 112 goto ufs2;
113 113
@@ -152,7 +152,8 @@ ufs2:
152 ret = temp + (u64) (frag & uspi->s_fpbmask); 152 ret = temp + (u64) (frag & uspi->s_fpbmask);
153 153
154out: 154out:
155 unlock_kernel(); 155 if (needs_lock)
156 unlock_ufs(sb);
156 return ret; 157 return ret;
157} 158}
158 159
@@ -415,14 +416,16 @@ out:
415int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) 416int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create)
416{ 417{
417 struct super_block * sb = inode->i_sb; 418 struct super_block * sb = inode->i_sb;
418 struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi; 419 struct ufs_sb_info * sbi = UFS_SB(sb);
420 struct ufs_sb_private_info * uspi = sbi->s_uspi;
419 struct buffer_head * bh; 421 struct buffer_head * bh;
420 int ret, err, new; 422 int ret, err, new;
421 unsigned long ptr,phys; 423 unsigned long ptr,phys;
422 u64 phys64 = 0; 424 u64 phys64 = 0;
425 bool needs_lock = (sbi->mutex_owner != current);
423 426
424 if (!create) { 427 if (!create) {
425 phys64 = ufs_frag_map(inode, fragment); 428 phys64 = ufs_frag_map(inode, fragment, needs_lock);
426 UFSD("phys64 = %llu\n", (unsigned long long)phys64); 429 UFSD("phys64 = %llu\n", (unsigned long long)phys64);
427 if (phys64) 430 if (phys64)
428 map_bh(bh_result, sb, phys64); 431 map_bh(bh_result, sb, phys64);
@@ -436,7 +439,8 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
436 ret = 0; 439 ret = 0;
437 bh = NULL; 440 bh = NULL;
438 441
439 lock_kernel(); 442 if (needs_lock)
443 lock_ufs(sb);
440 444
441 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); 445 UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment);
442 if (fragment > 446 if (fragment >
@@ -498,7 +502,9 @@ out:
498 set_buffer_new(bh_result); 502 set_buffer_new(bh_result);
499 map_bh(bh_result, sb, phys); 503 map_bh(bh_result, sb, phys);
500abort: 504abort:
501 unlock_kernel(); 505 if (needs_lock)
506 unlock_ufs(sb);
507
502 return err; 508 return err;
503 509
504abort_too_big: 510abort_too_big:
@@ -506,48 +512,6 @@ abort_too_big:
506 goto abort; 512 goto abort;
507} 513}
508 514
509static struct buffer_head *ufs_getfrag(struct inode *inode,
510 unsigned int fragment,
511 int create, int *err)
512{
513 struct buffer_head dummy;
514 int error;
515
516 dummy.b_state = 0;
517 dummy.b_blocknr = -1000;
518 error = ufs_getfrag_block(inode, fragment, &dummy, create);
519 *err = error;
520 if (!error && buffer_mapped(&dummy)) {
521 struct buffer_head *bh;
522 bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
523 if (buffer_new(&dummy)) {
524 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
525 set_buffer_uptodate(bh);
526 mark_buffer_dirty(bh);
527 }
528 return bh;
529 }
530 return NULL;
531}
532
533struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment,
534 int create, int * err)
535{
536 struct buffer_head * bh;
537
538 UFSD("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment);
539 bh = ufs_getfrag (inode, fragment, create, err);
540 if (!bh || buffer_uptodate(bh))
541 return bh;
542 ll_rw_block (READ, 1, &bh);
543 wait_on_buffer (bh);
544 if (buffer_uptodate(bh))
545 return bh;
546 brelse (bh);
547 *err = -EIO;
548 return NULL;
549}
550
551static int ufs_writepage(struct page *page, struct writeback_control *wbc) 515static int ufs_writepage(struct page *page, struct writeback_control *wbc)
552{ 516{
553 return block_write_full_page(page,ufs_getfrag_block,wbc); 517 return block_write_full_page(page,ufs_getfrag_block,wbc);
@@ -900,9 +864,9 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
900int ufs_write_inode(struct inode *inode, struct writeback_control *wbc) 864int ufs_write_inode(struct inode *inode, struct writeback_control *wbc)
901{ 865{
902 int ret; 866 int ret;
903 lock_kernel(); 867 lock_ufs(inode->i_sb);
904 ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); 868 ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
905 unlock_kernel(); 869 unlock_ufs(inode->i_sb);
906 return ret; 870 return ret;
907} 871}
908 872
@@ -922,22 +886,22 @@ void ufs_evict_inode(struct inode * inode)
922 if (want_delete) { 886 if (want_delete) {
923 loff_t old_i_size; 887 loff_t old_i_size;
924 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ 888 /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/
925 lock_kernel(); 889 lock_ufs(inode->i_sb);
926 mark_inode_dirty(inode); 890 mark_inode_dirty(inode);
927 ufs_update_inode(inode, IS_SYNC(inode)); 891 ufs_update_inode(inode, IS_SYNC(inode));
928 old_i_size = inode->i_size; 892 old_i_size = inode->i_size;
929 inode->i_size = 0; 893 inode->i_size = 0;
930 if (inode->i_blocks && ufs_truncate(inode, old_i_size)) 894 if (inode->i_blocks && ufs_truncate(inode, old_i_size))
931 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); 895 ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n");
932 unlock_kernel(); 896 unlock_ufs(inode->i_sb);
933 } 897 }
934 898
935 invalidate_inode_buffers(inode); 899 invalidate_inode_buffers(inode);
936 end_writeback(inode); 900 end_writeback(inode);
937 901
938 if (want_delete) { 902 if (want_delete) {
939 lock_kernel(); 903 lock_ufs(inode->i_sb);
940 ufs_free_inode (inode); 904 ufs_free_inode (inode);
941 unlock_kernel(); 905 unlock_ufs(inode->i_sb);
942 } 906 }
943} 907}
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 12f39b9e4437..29309e25417f 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -29,7 +29,6 @@
29 29
30#include <linux/time.h> 30#include <linux/time.h>
31#include <linux/fs.h> 31#include <linux/fs.h>
32#include <linux/smp_lock.h>
33 32
34#include "ufs_fs.h" 33#include "ufs_fs.h"
35#include "ufs.h" 34#include "ufs.h"
@@ -55,16 +54,16 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
55 if (dentry->d_name.len > UFS_MAXNAMLEN) 54 if (dentry->d_name.len > UFS_MAXNAMLEN)
56 return ERR_PTR(-ENAMETOOLONG); 55 return ERR_PTR(-ENAMETOOLONG);
57 56
58 lock_kernel(); 57 lock_ufs(dir->i_sb);
59 ino = ufs_inode_by_name(dir, &dentry->d_name); 58 ino = ufs_inode_by_name(dir, &dentry->d_name);
60 if (ino) { 59 if (ino) {
61 inode = ufs_iget(dir->i_sb, ino); 60 inode = ufs_iget(dir->i_sb, ino);
62 if (IS_ERR(inode)) { 61 if (IS_ERR(inode)) {
63 unlock_kernel(); 62 unlock_ufs(dir->i_sb);
64 return ERR_CAST(inode); 63 return ERR_CAST(inode);
65 } 64 }
66 } 65 }
67 unlock_kernel(); 66 unlock_ufs(dir->i_sb);
68 d_add(dentry, inode); 67 d_add(dentry, inode);
69 return NULL; 68 return NULL;
70} 69}
@@ -93,9 +92,9 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode,
93 inode->i_fop = &ufs_file_operations; 92 inode->i_fop = &ufs_file_operations;
94 inode->i_mapping->a_ops = &ufs_aops; 93 inode->i_mapping->a_ops = &ufs_aops;
95 mark_inode_dirty(inode); 94 mark_inode_dirty(inode);
96 lock_kernel(); 95 lock_ufs(dir->i_sb);
97 err = ufs_add_nondir(dentry, inode); 96 err = ufs_add_nondir(dentry, inode);
98 unlock_kernel(); 97 unlock_ufs(dir->i_sb);
99 } 98 }
100 UFSD("END: err=%d\n", err); 99 UFSD("END: err=%d\n", err);
101 return err; 100 return err;
@@ -115,9 +114,9 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t
115 init_special_inode(inode, mode, rdev); 114 init_special_inode(inode, mode, rdev);
116 ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev); 115 ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev);
117 mark_inode_dirty(inode); 116 mark_inode_dirty(inode);
118 lock_kernel(); 117 lock_ufs(dir->i_sb);
119 err = ufs_add_nondir(dentry, inode); 118 err = ufs_add_nondir(dentry, inode);
120 unlock_kernel(); 119 unlock_ufs(dir->i_sb);
121 } 120 }
122 return err; 121 return err;
123} 122}
@@ -133,7 +132,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
133 if (l > sb->s_blocksize) 132 if (l > sb->s_blocksize)
134 goto out_notlocked; 133 goto out_notlocked;
135 134
136 lock_kernel(); 135 lock_ufs(dir->i_sb);
137 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); 136 inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO);
138 err = PTR_ERR(inode); 137 err = PTR_ERR(inode);
139 if (IS_ERR(inode)) 138 if (IS_ERR(inode))
@@ -156,7 +155,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry,
156 155
157 err = ufs_add_nondir(dentry, inode); 156 err = ufs_add_nondir(dentry, inode);
158out: 157out:
159 unlock_kernel(); 158 unlock_ufs(dir->i_sb);
160out_notlocked: 159out_notlocked:
161 return err; 160 return err;
162 161
@@ -172,9 +171,9 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
172 struct inode *inode = old_dentry->d_inode; 171 struct inode *inode = old_dentry->d_inode;
173 int error; 172 int error;
174 173
175 lock_kernel(); 174 lock_ufs(dir->i_sb);
176 if (inode->i_nlink >= UFS_LINK_MAX) { 175 if (inode->i_nlink >= UFS_LINK_MAX) {
177 unlock_kernel(); 176 unlock_ufs(dir->i_sb);
178 return -EMLINK; 177 return -EMLINK;
179 } 178 }
180 179
@@ -183,7 +182,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
183 ihold(inode); 182 ihold(inode);
184 183
185 error = ufs_add_nondir(dentry, inode); 184 error = ufs_add_nondir(dentry, inode);
186 unlock_kernel(); 185 unlock_ufs(dir->i_sb);
187 return error; 186 return error;
188} 187}
189 188
@@ -195,7 +194,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
195 if (dir->i_nlink >= UFS_LINK_MAX) 194 if (dir->i_nlink >= UFS_LINK_MAX)
196 goto out; 195 goto out;
197 196
198 lock_kernel(); 197 lock_ufs(dir->i_sb);
199 inode_inc_link_count(dir); 198 inode_inc_link_count(dir);
200 199
201 inode = ufs_new_inode(dir, S_IFDIR|mode); 200 inode = ufs_new_inode(dir, S_IFDIR|mode);
@@ -216,7 +215,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
216 err = ufs_add_link(dentry, inode); 215 err = ufs_add_link(dentry, inode);
217 if (err) 216 if (err)
218 goto out_fail; 217 goto out_fail;
219 unlock_kernel(); 218 unlock_ufs(dir->i_sb);
220 219
221 d_instantiate(dentry, inode); 220 d_instantiate(dentry, inode);
222out: 221out:
@@ -228,7 +227,7 @@ out_fail:
228 iput (inode); 227 iput (inode);
229out_dir: 228out_dir:
230 inode_dec_link_count(dir); 229 inode_dec_link_count(dir);
231 unlock_kernel(); 230 unlock_ufs(dir->i_sb);
232 goto out; 231 goto out;
233} 232}
234 233
@@ -259,7 +258,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
259 struct inode * inode = dentry->d_inode; 258 struct inode * inode = dentry->d_inode;
260 int err= -ENOTEMPTY; 259 int err= -ENOTEMPTY;
261 260
262 lock_kernel(); 261 lock_ufs(dir->i_sb);
263 if (ufs_empty_dir (inode)) { 262 if (ufs_empty_dir (inode)) {
264 err = ufs_unlink(dir, dentry); 263 err = ufs_unlink(dir, dentry);
265 if (!err) { 264 if (!err) {
@@ -268,7 +267,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
268 inode_dec_link_count(dir); 267 inode_dec_link_count(dir);
269 } 268 }
270 } 269 }
271 unlock_kernel(); 270 unlock_ufs(dir->i_sb);
272 return err; 271 return err;
273} 272}
274 273
@@ -306,7 +305,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
306 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page); 305 new_de = ufs_find_entry(new_dir, &new_dentry->d_name, &new_page);
307 if (!new_de) 306 if (!new_de)
308 goto out_dir; 307 goto out_dir;
309 inode_inc_link_count(old_inode);
310 ufs_set_link(new_dir, new_de, new_page, old_inode); 308 ufs_set_link(new_dir, new_de, new_page, old_inode);
311 new_inode->i_ctime = CURRENT_TIME_SEC; 309 new_inode->i_ctime = CURRENT_TIME_SEC;
312 if (dir_de) 310 if (dir_de)
@@ -318,12 +316,9 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
318 if (new_dir->i_nlink >= UFS_LINK_MAX) 316 if (new_dir->i_nlink >= UFS_LINK_MAX)
319 goto out_dir; 317 goto out_dir;
320 } 318 }
321 inode_inc_link_count(old_inode);
322 err = ufs_add_link(new_dentry, old_inode); 319 err = ufs_add_link(new_dentry, old_inode);
323 if (err) { 320 if (err)
324 inode_dec_link_count(old_inode);
325 goto out_dir; 321 goto out_dir;
326 }
327 if (dir_de) 322 if (dir_de)
328 inode_inc_link_count(new_dir); 323 inode_inc_link_count(new_dir);
329 } 324 }
@@ -331,12 +326,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
331 /* 326 /*
332 * Like most other Unix systems, set the ctime for inodes on a 327 * Like most other Unix systems, set the ctime for inodes on a
333 * rename. 328 * rename.
334 * inode_dec_link_count() will mark the inode dirty.
335 */ 329 */
336 old_inode->i_ctime = CURRENT_TIME_SEC; 330 old_inode->i_ctime = CURRENT_TIME_SEC;
337 331
338 ufs_delete_entry(old_dir, old_de, old_page); 332 ufs_delete_entry(old_dir, old_de, old_page);
339 inode_dec_link_count(old_inode); 333 mark_inode_dirty(old_inode);
340 334
341 if (dir_de) { 335 if (dir_de) {
342 ufs_set_link(old_inode, dir_de, dir_page, new_dir); 336 ufs_set_link(old_inode, dir_de, dir_page, new_dir);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 2c61ac5d4e48..7693d6293404 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -84,7 +84,6 @@
84#include <linux/blkdev.h> 84#include <linux/blkdev.h>
85#include <linux/init.h> 85#include <linux/init.h>
86#include <linux/parser.h> 86#include <linux/parser.h>
87#include <linux/smp_lock.h>
88#include <linux/buffer_head.h> 87#include <linux/buffer_head.h>
89#include <linux/vfs.h> 88#include <linux/vfs.h>
90#include <linux/log2.h> 89#include <linux/log2.h>
@@ -96,6 +95,26 @@
96#include "swab.h" 95#include "swab.h"
97#include "util.h" 96#include "util.h"
98 97
98void lock_ufs(struct super_block *sb)
99{
100#if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT)
101 struct ufs_sb_info *sbi = UFS_SB(sb);
102
103 mutex_lock(&sbi->mutex);
104 sbi->mutex_owner = current;
105#endif
106}
107
108void unlock_ufs(struct super_block *sb)
109{
110#if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT)
111 struct ufs_sb_info *sbi = UFS_SB(sb);
112
113 sbi->mutex_owner = NULL;
114 mutex_unlock(&sbi->mutex);
115#endif
116}
117
99static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) 118static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation)
100{ 119{
101 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; 120 struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
@@ -313,7 +332,6 @@ void ufs_panic (struct super_block * sb, const char * function,
313 struct ufs_super_block_first * usb1; 332 struct ufs_super_block_first * usb1;
314 va_list args; 333 va_list args;
315 334
316 lock_kernel();
317 uspi = UFS_SB(sb)->s_uspi; 335 uspi = UFS_SB(sb)->s_uspi;
318 usb1 = ubh_get_usb_first(uspi); 336 usb1 = ubh_get_usb_first(uspi);
319 337
@@ -521,7 +539,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
521 */ 539 */
522 size = uspi->s_cssize; 540 size = uspi->s_cssize;
523 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; 541 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
524 base = space = kmalloc(size, GFP_KERNEL); 542 base = space = kmalloc(size, GFP_NOFS);
525 if (!base) 543 if (!base)
526 goto failed; 544 goto failed;
527 sbi->s_csp = (struct ufs_csum *)space; 545 sbi->s_csp = (struct ufs_csum *)space;
@@ -546,7 +564,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
546 * Read cylinder group (we read only first fragment from block 564 * Read cylinder group (we read only first fragment from block
547 * at this time) and prepare internal data structures for cg caching. 565 * at this time) and prepare internal data structures for cg caching.
548 */ 566 */
549 if (!(sbi->s_ucg = kmalloc (sizeof(struct buffer_head *) * uspi->s_ncg, GFP_KERNEL))) 567 if (!(sbi->s_ucg = kmalloc (sizeof(struct buffer_head *) * uspi->s_ncg, GFP_NOFS)))
550 goto failed; 568 goto failed;
551 for (i = 0; i < uspi->s_ncg; i++) 569 for (i = 0; i < uspi->s_ncg; i++)
552 sbi->s_ucg[i] = NULL; 570 sbi->s_ucg[i] = NULL;
@@ -564,7 +582,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
564 ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data); 582 ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data);
565 } 583 }
566 for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) { 584 for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) {
567 if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_KERNEL))) 585 if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_NOFS)))
568 goto failed; 586 goto failed;
569 sbi->s_cgno[i] = UFS_CGNO_EMPTY; 587 sbi->s_cgno[i] = UFS_CGNO_EMPTY;
570 } 588 }
@@ -646,8 +664,6 @@ static void ufs_put_super_internal(struct super_block *sb)
646 664
647 UFSD("ENTER\n"); 665 UFSD("ENTER\n");
648 666
649 lock_kernel();
650
651 ufs_put_cstotal(sb); 667 ufs_put_cstotal(sb);
652 size = uspi->s_cssize; 668 size = uspi->s_cssize;
653 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; 669 blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -676,8 +692,6 @@ static void ufs_put_super_internal(struct super_block *sb)
676 kfree (sbi->s_ucg); 692 kfree (sbi->s_ucg);
677 kfree (base); 693 kfree (base);
678 694
679 unlock_kernel();
680
681 UFSD("EXIT\n"); 695 UFSD("EXIT\n");
682} 696}
683 697
@@ -696,8 +710,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
696 unsigned maxsymlen; 710 unsigned maxsymlen;
697 int ret = -EINVAL; 711 int ret = -EINVAL;
698 712
699 lock_kernel();
700
701 uspi = NULL; 713 uspi = NULL;
702 ubh = NULL; 714 ubh = NULL;
703 flags = 0; 715 flags = 0;
@@ -718,6 +730,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
718 goto failed; 730 goto failed;
719 } 731 }
720#endif 732#endif
733 mutex_init(&sbi->mutex);
721 /* 734 /*
722 * Set default mount options 735 * Set default mount options
723 * Parse mount options 736 * Parse mount options
@@ -1165,7 +1178,6 @@ magic_found:
1165 goto failed; 1178 goto failed;
1166 1179
1167 UFSD("EXIT\n"); 1180 UFSD("EXIT\n");
1168 unlock_kernel();
1169 return 0; 1181 return 0;
1170 1182
1171dalloc_failed: 1183dalloc_failed:
@@ -1177,12 +1189,10 @@ failed:
1177 kfree(sbi); 1189 kfree(sbi);
1178 sb->s_fs_info = NULL; 1190 sb->s_fs_info = NULL;
1179 UFSD("EXIT (FAILED)\n"); 1191 UFSD("EXIT (FAILED)\n");
1180 unlock_kernel();
1181 return ret; 1192 return ret;
1182 1193
1183failed_nomem: 1194failed_nomem:
1184 UFSD("EXIT (NOMEM)\n"); 1195 UFSD("EXIT (NOMEM)\n");
1185 unlock_kernel();
1186 return -ENOMEM; 1196 return -ENOMEM;
1187} 1197}
1188 1198
@@ -1193,8 +1203,8 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
1193 struct ufs_super_block_third * usb3; 1203 struct ufs_super_block_third * usb3;
1194 unsigned flags; 1204 unsigned flags;
1195 1205
1206 lock_ufs(sb);
1196 lock_super(sb); 1207 lock_super(sb);
1197 lock_kernel();
1198 1208
1199 UFSD("ENTER\n"); 1209 UFSD("ENTER\n");
1200 1210
@@ -1213,8 +1223,8 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
1213 sb->s_dirt = 0; 1223 sb->s_dirt = 0;
1214 1224
1215 UFSD("EXIT\n"); 1225 UFSD("EXIT\n");
1216 unlock_kernel();
1217 unlock_super(sb); 1226 unlock_super(sb);
1227 unlock_ufs(sb);
1218 1228
1219 return 0; 1229 return 0;
1220} 1230}
@@ -1256,7 +1266,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1256 unsigned new_mount_opt, ufstype; 1266 unsigned new_mount_opt, ufstype;
1257 unsigned flags; 1267 unsigned flags;
1258 1268
1259 lock_kernel(); 1269 lock_ufs(sb);
1260 lock_super(sb); 1270 lock_super(sb);
1261 uspi = UFS_SB(sb)->s_uspi; 1271 uspi = UFS_SB(sb)->s_uspi;
1262 flags = UFS_SB(sb)->s_flags; 1272 flags = UFS_SB(sb)->s_flags;
@@ -1272,7 +1282,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1272 ufs_set_opt (new_mount_opt, ONERROR_LOCK); 1282 ufs_set_opt (new_mount_opt, ONERROR_LOCK);
1273 if (!ufs_parse_options (data, &new_mount_opt)) { 1283 if (!ufs_parse_options (data, &new_mount_opt)) {
1274 unlock_super(sb); 1284 unlock_super(sb);
1275 unlock_kernel(); 1285 unlock_ufs(sb);
1276 return -EINVAL; 1286 return -EINVAL;
1277 } 1287 }
1278 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { 1288 if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
@@ -1280,14 +1290,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1280 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { 1290 } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
1281 printk("ufstype can't be changed during remount\n"); 1291 printk("ufstype can't be changed during remount\n");
1282 unlock_super(sb); 1292 unlock_super(sb);
1283 unlock_kernel(); 1293 unlock_ufs(sb);
1284 return -EINVAL; 1294 return -EINVAL;
1285 } 1295 }
1286 1296
1287 if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { 1297 if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
1288 UFS_SB(sb)->s_mount_opt = new_mount_opt; 1298 UFS_SB(sb)->s_mount_opt = new_mount_opt;
1289 unlock_super(sb); 1299 unlock_super(sb);
1290 unlock_kernel(); 1300 unlock_ufs(sb);
1291 return 0; 1301 return 0;
1292 } 1302 }
1293 1303
@@ -1313,7 +1323,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1313 printk("ufs was compiled with read-only support, " 1323 printk("ufs was compiled with read-only support, "
1314 "can't be mounted as read-write\n"); 1324 "can't be mounted as read-write\n");
1315 unlock_super(sb); 1325 unlock_super(sb);
1316 unlock_kernel(); 1326 unlock_ufs(sb);
1317 return -EINVAL; 1327 return -EINVAL;
1318#else 1328#else
1319 if (ufstype != UFS_MOUNT_UFSTYPE_SUN && 1329 if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
@@ -1323,13 +1333,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1323 ufstype != UFS_MOUNT_UFSTYPE_UFS2) { 1333 ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
1324 printk("this ufstype is read-only supported\n"); 1334 printk("this ufstype is read-only supported\n");
1325 unlock_super(sb); 1335 unlock_super(sb);
1326 unlock_kernel(); 1336 unlock_ufs(sb);
1327 return -EINVAL; 1337 return -EINVAL;
1328 } 1338 }
1329 if (!ufs_read_cylinder_structures(sb)) { 1339 if (!ufs_read_cylinder_structures(sb)) {
1330 printk("failed during remounting\n"); 1340 printk("failed during remounting\n");
1331 unlock_super(sb); 1341 unlock_super(sb);
1332 unlock_kernel(); 1342 unlock_ufs(sb);
1333 return -EPERM; 1343 return -EPERM;
1334 } 1344 }
1335 sb->s_flags &= ~MS_RDONLY; 1345 sb->s_flags &= ~MS_RDONLY;
@@ -1337,7 +1347,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
1337 } 1347 }
1338 UFS_SB(sb)->s_mount_opt = new_mount_opt; 1348 UFS_SB(sb)->s_mount_opt = new_mount_opt;
1339 unlock_super(sb); 1349 unlock_super(sb);
1340 unlock_kernel(); 1350 unlock_ufs(sb);
1341 return 0; 1351 return 0;
1342} 1352}
1343 1353
@@ -1371,7 +1381,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1371 struct ufs_super_block_third *usb3; 1381 struct ufs_super_block_third *usb3;
1372 u64 id = huge_encode_dev(sb->s_bdev->bd_dev); 1382 u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
1373 1383
1374 lock_kernel(); 1384 lock_ufs(sb);
1375 1385
1376 usb1 = ubh_get_usb_first(uspi); 1386 usb1 = ubh_get_usb_first(uspi);
1377 usb2 = ubh_get_usb_second(uspi); 1387 usb2 = ubh_get_usb_second(uspi);
@@ -1395,7 +1405,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf)
1395 buf->f_fsid.val[0] = (u32)id; 1405 buf->f_fsid.val[0] = (u32)id;
1396 buf->f_fsid.val[1] = (u32)(id >> 32); 1406 buf->f_fsid.val[1] = (u32)(id >> 32);
1397 1407
1398 unlock_kernel(); 1408 unlock_ufs(sb);
1399 1409
1400 return 0; 1410 return 0;
1401} 1411}
@@ -1405,7 +1415,7 @@ static struct kmem_cache * ufs_inode_cachep;
1405static struct inode *ufs_alloc_inode(struct super_block *sb) 1415static struct inode *ufs_alloc_inode(struct super_block *sb)
1406{ 1416{
1407 struct ufs_inode_info *ei; 1417 struct ufs_inode_info *ei;
1408 ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL); 1418 ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS);
1409 if (!ei) 1419 if (!ei)
1410 return NULL; 1420 return NULL;
1411 ei->vfs_inode.i_version = 1; 1421 ei->vfs_inode.i_version = 1;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index a58f9155fc9a..e56a4f567212 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -40,7 +40,6 @@
40#include <linux/time.h> 40#include <linux/time.h>
41#include <linux/stat.h> 41#include <linux/stat.h>
42#include <linux/string.h> 42#include <linux/string.h>
43#include <linux/smp_lock.h>
44#include <linux/buffer_head.h> 43#include <linux/buffer_head.h>
45#include <linux/blkdev.h> 44#include <linux/blkdev.h>
46#include <linux/sched.h> 45#include <linux/sched.h>
@@ -467,7 +466,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
467 466
468 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); 467 block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
469 468
470 lock_kernel();
471 while (1) { 469 while (1) {
472 retry = ufs_trunc_direct(inode); 470 retry = ufs_trunc_direct(inode);
473 retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK, 471 retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
@@ -487,7 +485,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
487 485
488 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 486 inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
489 ufsi->i_lastfrag = DIRECT_FRAGMENT; 487 ufsi->i_lastfrag = DIRECT_FRAGMENT;
490 unlock_kernel();
491 mark_inode_dirty(inode); 488 mark_inode_dirty(inode);
492out: 489out:
493 UFSD("EXIT: err %d\n", err); 490 UFSD("EXIT: err %d\n", err);
@@ -510,7 +507,9 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
510 /* XXX(truncate): truncate_setsize should be called last */ 507 /* XXX(truncate): truncate_setsize should be called last */
511 truncate_setsize(inode, attr->ia_size); 508 truncate_setsize(inode, attr->ia_size);
512 509
510 lock_ufs(inode->i_sb);
513 error = ufs_truncate(inode, old_i_size); 511 error = ufs_truncate(inode, old_i_size);
512 unlock_ufs(inode->i_sb);
514 if (error) 513 if (error)
515 return error; 514 return error;
516 } 515 }
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index c08782e1b48a..5be2755dd715 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -18,6 +18,8 @@ struct ufs_sb_info {
18 unsigned s_cgno[UFS_MAX_GROUP_LOADED]; 18 unsigned s_cgno[UFS_MAX_GROUP_LOADED];
19 unsigned short s_cg_loaded; 19 unsigned short s_cg_loaded;
20 unsigned s_mount_opt; 20 unsigned s_mount_opt;
21 struct mutex mutex;
22 struct task_struct *mutex_owner;
21}; 23};
22 24
23struct ufs_inode_info { 25struct ufs_inode_info {
@@ -109,7 +111,6 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
109extern int ufs_write_inode (struct inode *, struct writeback_control *); 111extern int ufs_write_inode (struct inode *, struct writeback_control *);
110extern int ufs_sync_inode (struct inode *); 112extern int ufs_sync_inode (struct inode *);
111extern void ufs_evict_inode (struct inode *); 113extern void ufs_evict_inode (struct inode *);
112extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *);
113extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); 114extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create);
114 115
115/* namei.c */ 116/* namei.c */
@@ -154,4 +155,7 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b)
154 return do_div(b, uspi->s_fpg); 155 return do_div(b, uspi->s_fpg);
155} 156}
156 157
158extern void lock_ufs(struct super_block *sb);
159extern void unlock_ufs(struct super_block *sb);
160
157#endif /* _UFS_UFS_H */ 161#endif /* _UFS_UFS_H */
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index d2c36d53fe66..95425b59ce0a 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -27,7 +27,7 @@ struct ufs_buffer_head * _ubh_bread_ (struct ufs_sb_private_info * uspi,
27 if (count > UFS_MAXFRAG) 27 if (count > UFS_MAXFRAG)
28 return NULL; 28 return NULL;
29 ubh = (struct ufs_buffer_head *) 29 ubh = (struct ufs_buffer_head *)
30 kmalloc (sizeof (struct ufs_buffer_head), GFP_KERNEL); 30 kmalloc (sizeof (struct ufs_buffer_head), GFP_NOFS);
31 if (!ubh) 31 if (!ubh)
32 return NULL; 32 return NULL;
33 ubh->fragment = fragment; 33 ubh->fragment = fragment;
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 9f8775ce381c..954175928240 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -408,7 +408,7 @@ static inline unsigned _ubh_find_next_zero_bit_(
408 for (;;) { 408 for (;;) {
409 count = min_t(unsigned int, size + offset, uspi->s_bpf); 409 count = min_t(unsigned int, size + offset, uspi->s_bpf);
410 size -= count - offset; 410 size -= count - offset;
411 pos = ext2_find_next_zero_bit (ubh->bh[base]->b_data, count, offset); 411 pos = find_next_zero_bit_le(ubh->bh[base]->b_data, count, offset);
412 if (pos < count || !size) 412 if (pos < count || !size)
413 break; 413 break;
414 base++; 414 base++;
diff --git a/fs/utimes.c b/fs/utimes.c
index 179b58690657..ba653f3dc1bc 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -95,7 +95,7 @@ static int utimes_common(struct path *path, struct timespec *times)
95 if (IS_IMMUTABLE(inode)) 95 if (IS_IMMUTABLE(inode))
96 goto mnt_drop_write_and_out; 96 goto mnt_drop_write_and_out;
97 97
98 if (!is_owner_or_cap(inode)) { 98 if (!inode_owner_or_capable(inode)) {
99 error = inode_permission(inode, MAY_WRITE); 99 error = inode_permission(inode, MAY_WRITE);
100 if (error) 100 if (error)
101 goto mnt_drop_write_and_out; 101 goto mnt_drop_write_and_out;
diff --git a/fs/xattr.c b/fs/xattr.c
index 01bb8135e14a..a19acdb81cd1 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -59,7 +59,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
59 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) 59 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
60 return -EPERM; 60 return -EPERM;
61 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && 61 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
62 (mask & MAY_WRITE) && !is_owner_or_cap(inode)) 62 (mask & MAY_WRITE) && !inode_owner_or_capable(inode))
63 return -EPERM; 63 return -EPERM;
64 } 64 }
65 65
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index faca44997099..284a7c89697e 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,14 +16,11 @@
16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17# 17#
18 18
19EXTRA_CFLAGS += -I$(src) -I$(src)/linux-2.6 19ccflags-y := -I$(src) -I$(src)/linux-2.6
20ccflags-$(CONFIG_XFS_DEBUG) += -g
20 21
21XFS_LINUX := linux-2.6 22XFS_LINUX := linux-2.6
22 23
23ifeq ($(CONFIG_XFS_DEBUG),y)
24 EXTRA_CFLAGS += -g
25endif
26
27obj-$(CONFIG_XFS_FS) += xfs.o 24obj-$(CONFIG_XFS_FS) += xfs.o
28 25
29xfs-y += linux-2.6/xfs_trace.o 26xfs-y += linux-2.6/xfs_trace.o
@@ -105,11 +102,10 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
105 xfs_globals.o \ 102 xfs_globals.o \
106 xfs_ioctl.o \ 103 xfs_ioctl.o \
107 xfs_iops.o \ 104 xfs_iops.o \
105 xfs_message.o \
108 xfs_super.o \ 106 xfs_super.o \
109 xfs_sync.o \ 107 xfs_sync.o \
110 xfs_xattr.o) 108 xfs_xattr.o)
111 109
112# Objects in support/ 110# Objects in support/
113xfs-y += $(addprefix support/, \ 111xfs-y += support/uuid.o
114 debug.o \
115 uuid.o)
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 666c9db48eb6..a907de565db3 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -23,6 +23,7 @@
23#include <linux/backing-dev.h> 23#include <linux/backing-dev.h>
24#include "time.h" 24#include "time.h"
25#include "kmem.h" 25#include "kmem.h"
26#include "xfs_message.h"
26 27
27/* 28/*
28 * Greedy allocation. May fail and may return vmalloced memory. 29 * Greedy allocation. May fail and may return vmalloced memory.
@@ -56,8 +57,8 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
56 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 57 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
57 return ptr; 58 return ptr;
58 if (!(++retries % 100)) 59 if (!(++retries % 100))
59 printk(KERN_ERR "XFS: possible memory allocation " 60 xfs_err(NULL,
60 "deadlock in %s (mode:0x%x)\n", 61 "possible memory allocation deadlock in %s (mode:0x%x)",
61 __func__, lflags); 62 __func__, lflags);
62 congestion_wait(BLK_RW_ASYNC, HZ/50); 63 congestion_wait(BLK_RW_ASYNC, HZ/50);
63 } while (1); 64 } while (1);
@@ -112,8 +113,8 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
112 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) 113 if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
113 return ptr; 114 return ptr;
114 if (!(++retries % 100)) 115 if (!(++retries % 100))
115 printk(KERN_ERR "XFS: possible memory allocation " 116 xfs_err(NULL,
116 "deadlock in %s (mode:0x%x)\n", 117 "possible memory allocation deadlock in %s (mode:0x%x)",
117 __func__, lflags); 118 __func__, lflags);
118 congestion_wait(BLK_RW_ASYNC, HZ/50); 119 congestion_wait(BLK_RW_ASYNC, HZ/50);
119 } while (1); 120 } while (1);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index ec7bbb5645b6..8c5c87277456 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -854,7 +854,7 @@ xfs_aops_discard_page(
854 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 854 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
855 goto out_invalidate; 855 goto out_invalidate;
856 856
857 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 857 xfs_alert(ip->i_mount,
858 "page discard on page %p, inode 0x%llx, offset %llu.", 858 "page discard on page %p, inode 0x%llx, offset %llu.",
859 page, ip->i_ino, offset); 859 page, ip->i_ino, offset);
860 860
@@ -872,7 +872,7 @@ xfs_aops_discard_page(
872 if (error) { 872 if (error) {
873 /* something screwed, just bail */ 873 /* something screwed, just bail */
874 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 874 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
875 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 875 xfs_alert(ip->i_mount,
876 "page discard unable to remove delalloc mapping."); 876 "page discard unable to remove delalloc mapping.");
877 } 877 }
878 break; 878 break;
@@ -1411,7 +1411,7 @@ xfs_vm_write_failed(
1411 if (error) { 1411 if (error) {
1412 /* something screwed, just bail */ 1412 /* something screwed, just bail */
1413 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1413 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
1414 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 1414 xfs_alert(ip->i_mount,
1415 "xfs_vm_write_failed: unable to clean up ino %lld", 1415 "xfs_vm_write_failed: unable to clean up ino %lld",
1416 ip->i_ino); 1416 ip->i_ino);
1417 } 1417 }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ac1c7e8378dd..5cb230f2cb4f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -401,9 +401,8 @@ _xfs_buf_lookup_pages(
401 * handle buffer allocation failures we can't do much. 401 * handle buffer allocation failures we can't do much.
402 */ 402 */
403 if (!(++retries % 100)) 403 if (!(++retries % 100))
404 printk(KERN_ERR 404 xfs_err(NULL,
405 "XFS: possible memory allocation " 405 "possible memory allocation deadlock in %s (mode:0x%x)",
406 "deadlock in %s (mode:0x%x)\n",
407 __func__, gfp_mask); 406 __func__, gfp_mask);
408 407
409 XFS_STATS_INC(xb_page_retries); 408 XFS_STATS_INC(xb_page_retries);
@@ -615,8 +614,8 @@ xfs_buf_get(
615 if (!(bp->b_flags & XBF_MAPPED)) { 614 if (!(bp->b_flags & XBF_MAPPED)) {
616 error = _xfs_buf_map_pages(bp, flags); 615 error = _xfs_buf_map_pages(bp, flags);
617 if (unlikely(error)) { 616 if (unlikely(error)) {
618 printk(KERN_WARNING "%s: failed to map pages\n", 617 xfs_warn(target->bt_mount,
619 __func__); 618 "%s: failed to map pages\n", __func__);
620 goto no_buffer; 619 goto no_buffer;
621 } 620 }
622 } 621 }
@@ -850,8 +849,8 @@ xfs_buf_get_uncached(
850 849
851 error = _xfs_buf_map_pages(bp, XBF_MAPPED); 850 error = _xfs_buf_map_pages(bp, XBF_MAPPED);
852 if (unlikely(error)) { 851 if (unlikely(error)) {
853 printk(KERN_WARNING "%s: failed to map pages\n", 852 xfs_warn(target->bt_mount,
854 __func__); 853 "%s: failed to map pages\n", __func__);
855 goto fail_free_mem; 854 goto fail_free_mem;
856 } 855 }
857 856
@@ -1617,8 +1616,8 @@ xfs_setsize_buftarg_flags(
1617 btp->bt_smask = sectorsize - 1; 1616 btp->bt_smask = sectorsize - 1;
1618 1617
1619 if (set_blocksize(btp->bt_bdev, sectorsize)) { 1618 if (set_blocksize(btp->bt_bdev, sectorsize)) {
1620 printk(KERN_WARNING 1619 xfs_warn(btp->bt_mount,
1621 "XFS: Cannot set_blocksize to %u on device %s\n", 1620 "Cannot set_blocksize to %u on device %s\n",
1622 sectorsize, XFS_BUFTARG_NAME(btp)); 1621 sectorsize, XFS_BUFTARG_NAME(btp));
1623 return EINVAL; 1622 return EINVAL;
1624 } 1623 }
@@ -2022,11 +2021,12 @@ xfs_buf_init(void)
2022 if (!xfslogd_workqueue) 2021 if (!xfslogd_workqueue)
2023 goto out_free_buf_zone; 2022 goto out_free_buf_zone;
2024 2023
2025 xfsdatad_workqueue = create_workqueue("xfsdatad"); 2024 xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
2026 if (!xfsdatad_workqueue) 2025 if (!xfsdatad_workqueue)
2027 goto out_destroy_xfslogd_workqueue; 2026 goto out_destroy_xfslogd_workqueue;
2028 2027
2029 xfsconvertd_workqueue = create_workqueue("xfsconvertd"); 2028 xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
2029 WQ_MEM_RECLAIM, 1);
2030 if (!xfsconvertd_workqueue) 2030 if (!xfsconvertd_workqueue)
2031 goto out_destroy_xfsdatad_workqueue; 2031 goto out_destroy_xfsdatad_workqueue;
2032 2032
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index fc0114da7fdd..f4f878fc0083 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -89,8 +89,10 @@ xfs_fs_encode_fh(
89 * seven combinations work. The real answer is "don't use v2". 89 * seven combinations work. The real answer is "don't use v2".
90 */ 90 */
91 len = xfs_fileid_length(fileid_type); 91 len = xfs_fileid_length(fileid_type);
92 if (*max_len < len) 92 if (*max_len < len) {
93 *max_len = len;
93 return 255; 94 return 255;
95 }
94 *max_len = len; 96 *max_len = len;
95 97
96 switch (fileid_type) { 98 switch (fileid_type) {
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f5e2a19e0f8e..0ca0e3c024d7 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -695,14 +695,19 @@ xfs_ioc_fsgeometry_v1(
695 xfs_mount_t *mp, 695 xfs_mount_t *mp,
696 void __user *arg) 696 void __user *arg)
697{ 697{
698 xfs_fsop_geom_v1_t fsgeo; 698 xfs_fsop_geom_t fsgeo;
699 int error; 699 int error;
700 700
701 error = xfs_fs_geometry(mp, (xfs_fsop_geom_t *)&fsgeo, 3); 701 error = xfs_fs_geometry(mp, &fsgeo, 3);
702 if (error) 702 if (error)
703 return -error; 703 return -error;
704 704
705 if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) 705 /*
706 * Caller should have passed an argument of type
707 * xfs_fsop_geom_v1_t. This is a proper subset of the
708 * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
709 */
710 if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
706 return -XFS_ERROR(EFAULT); 711 return -XFS_ERROR(EFAULT);
707 return 0; 712 return 0;
708} 713}
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index bd5727852fd6..9ff7fc603d2f 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -102,7 +102,8 @@ xfs_mark_inode_dirty(
102STATIC int 102STATIC int
103xfs_init_security( 103xfs_init_security(
104 struct inode *inode, 104 struct inode *inode,
105 struct inode *dir) 105 struct inode *dir,
106 const struct qstr *qstr)
106{ 107{
107 struct xfs_inode *ip = XFS_I(inode); 108 struct xfs_inode *ip = XFS_I(inode);
108 size_t length; 109 size_t length;
@@ -110,7 +111,7 @@ xfs_init_security(
110 unsigned char *name; 111 unsigned char *name;
111 int error; 112 int error;
112 113
113 error = security_inode_init_security(inode, dir, (char **)&name, 114 error = security_inode_init_security(inode, dir, qstr, (char **)&name,
114 &value, &length); 115 &value, &length);
115 if (error) { 116 if (error) {
116 if (error == -EOPNOTSUPP) 117 if (error == -EOPNOTSUPP)
@@ -194,7 +195,7 @@ xfs_vn_mknod(
194 195
195 inode = VFS_I(ip); 196 inode = VFS_I(ip);
196 197
197 error = xfs_init_security(inode, dir); 198 error = xfs_init_security(inode, dir, &dentry->d_name);
198 if (unlikely(error)) 199 if (unlikely(error))
199 goto out_cleanup_inode; 200 goto out_cleanup_inode;
200 201
@@ -367,7 +368,7 @@ xfs_vn_symlink(
367 368
368 inode = VFS_I(cip); 369 inode = VFS_I(cip);
369 370
370 error = xfs_init_security(inode, dir); 371 error = xfs_init_security(inode, dir, &dentry->d_name);
371 if (unlikely(error)) 372 if (unlikely(error))
372 goto out_cleanup_inode; 373 goto out_cleanup_inode;
373 374
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 096494997747..244be9cbfe78 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -39,7 +39,6 @@
39#include <mrlock.h> 39#include <mrlock.h>
40#include <time.h> 40#include <time.h>
41 41
42#include <support/debug.h>
43#include <support/uuid.h> 42#include <support/uuid.h>
44 43
45#include <linux/semaphore.h> 44#include <linux/semaphore.h>
@@ -86,6 +85,7 @@
86#include <xfs_aops.h> 85#include <xfs_aops.h>
87#include <xfs_super.h> 86#include <xfs_super.h>
88#include <xfs_buf.h> 87#include <xfs_buf.h>
88#include <xfs_message.h>
89 89
90/* 90/*
91 * Feature macros (disable/enable) 91 * Feature macros (disable/enable)
@@ -280,4 +280,25 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
280#define __arch_pack 280#define __arch_pack
281#endif 281#endif
282 282
283#define ASSERT_ALWAYS(expr) \
284 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
285
286#ifndef DEBUG
287#define ASSERT(expr) ((void)0)
288
289#ifndef STATIC
290# define STATIC static noinline
291#endif
292
293#else /* DEBUG */
294
295#define ASSERT(expr) \
296 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
297
298#ifndef STATIC
299# define STATIC noinline
300#endif
301
302#endif /* DEBUG */
303
283#endif /* __XFS_LINUX__ */ 304#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
new file mode 100644
index 000000000000..508e06fd7d1e
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_message.c
@@ -0,0 +1,133 @@
1/*
2 * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write the Free Software Foundation,
15 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18#include "xfs.h"
19#include "xfs_fs.h"
20#include "xfs_types.h"
21#include "xfs_log.h"
22#include "xfs_inum.h"
23#include "xfs_trans.h"
24#include "xfs_sb.h"
25#include "xfs_ag.h"
26#include "xfs_mount.h"
27
28/*
29 * XFS logging functions
30 */
31static int
32__xfs_printk(
33 const char *level,
34 const struct xfs_mount *mp,
35 struct va_format *vaf)
36{
37 if (mp && mp->m_fsname)
38 return printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
39 return printk("%sXFS: %pV\n", level, vaf);
40}
41
42int xfs_printk(
43 const char *level,
44 const struct xfs_mount *mp,
45 const char *fmt, ...)
46{
47 struct va_format vaf;
48 va_list args;
49 int r;
50
51 va_start(args, fmt);
52
53 vaf.fmt = fmt;
54 vaf.va = &args;
55
56 r = __xfs_printk(level, mp, &vaf);
57 va_end(args);
58
59 return r;
60}
61
62#define define_xfs_printk_level(func, kern_level) \
63int func(const struct xfs_mount *mp, const char *fmt, ...) \
64{ \
65 struct va_format vaf; \
66 va_list args; \
67 int r; \
68 \
69 va_start(args, fmt); \
70 \
71 vaf.fmt = fmt; \
72 vaf.va = &args; \
73 \
74 r = __xfs_printk(kern_level, mp, &vaf); \
75 va_end(args); \
76 \
77 return r; \
78} \
79
80define_xfs_printk_level(xfs_emerg, KERN_EMERG);
81define_xfs_printk_level(xfs_alert, KERN_ALERT);
82define_xfs_printk_level(xfs_crit, KERN_CRIT);
83define_xfs_printk_level(xfs_err, KERN_ERR);
84define_xfs_printk_level(xfs_warn, KERN_WARNING);
85define_xfs_printk_level(xfs_notice, KERN_NOTICE);
86define_xfs_printk_level(xfs_info, KERN_INFO);
87#ifdef DEBUG
88define_xfs_printk_level(xfs_debug, KERN_DEBUG);
89#endif
90
91int
92xfs_alert_tag(
93 const struct xfs_mount *mp,
94 int panic_tag,
95 const char *fmt, ...)
96{
97 struct va_format vaf;
98 va_list args;
99 int do_panic = 0;
100 int r;
101
102 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
103 xfs_printk(KERN_ALERT, mp,
104 "XFS: Transforming an alert into a BUG.");
105 do_panic = 1;
106 }
107
108 va_start(args, fmt);
109
110 vaf.fmt = fmt;
111 vaf.va = &args;
112
113 r = __xfs_printk(KERN_ALERT, mp, &vaf);
114 va_end(args);
115
116 BUG_ON(do_panic);
117
118 return r;
119}
120
121void
122assfail(char *expr, char *file, int line)
123{
124 xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
125 expr, file, line);
126 BUG();
127}
128
129void
130xfs_hex_dump(void *p, int length)
131{
132 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
133}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
new file mode 100644
index 000000000000..e77ffa16745b
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_message.h
@@ -0,0 +1,38 @@
1#ifndef __XFS_MESSAGE_H
2#define __XFS_MESSAGE_H 1
3
4struct xfs_mount;
5
6extern int xfs_printk(const char *level, const struct xfs_mount *mp,
7 const char *fmt, ...)
8 __attribute__ ((format (printf, 3, 4)));
9extern int xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
10 __attribute__ ((format (printf, 2, 3)));
11extern int xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
12 __attribute__ ((format (printf, 2, 3)));
13extern int xfs_alert_tag(const struct xfs_mount *mp, int tag,
14 const char *fmt, ...)
15 __attribute__ ((format (printf, 3, 4)));
16extern int xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
17 __attribute__ ((format (printf, 2, 3)));
18extern int xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
19 __attribute__ ((format (printf, 2, 3)));
20extern int xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
21 __attribute__ ((format (printf, 2, 3)));
22extern int xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
23 __attribute__ ((format (printf, 2, 3)));
24extern int xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
25 __attribute__ ((format (printf, 2, 3)));
26
27#ifdef DEBUG
28extern int xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
29 __attribute__ ((format (printf, 2, 3)));
30#else
31#define xfs_debug(mp, fmt, ...) (0)
32#endif
33
34extern void assfail(char *expr, char *f, int l);
35
36extern void xfs_hex_dump(void *p, int length);
37
38#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9731898083ae..818c4cf2de86 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -173,6 +173,15 @@ xfs_parseargs(
173 __uint8_t iosizelog = 0; 173 __uint8_t iosizelog = 0;
174 174
175 /* 175 /*
176 * set up the mount name first so all the errors will refer to the
177 * correct device.
178 */
179 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
180 if (!mp->m_fsname)
181 return ENOMEM;
182 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
183
184 /*
176 * Copy binary VFS mount flags we are interested in. 185 * Copy binary VFS mount flags we are interested in.
177 */ 186 */
178 if (sb->s_flags & MS_RDONLY) 187 if (sb->s_flags & MS_RDONLY)
@@ -189,6 +198,7 @@ xfs_parseargs(
189 mp->m_flags |= XFS_MOUNT_BARRIER; 198 mp->m_flags |= XFS_MOUNT_BARRIER;
190 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; 199 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
191 mp->m_flags |= XFS_MOUNT_SMALL_INUMS; 200 mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
201 mp->m_flags |= XFS_MOUNT_DELAYLOG;
192 202
193 /* 203 /*
194 * These can be overridden by the mount option parsing. 204 * These can be overridden by the mount option parsing.
@@ -207,24 +217,21 @@ xfs_parseargs(
207 217
208 if (!strcmp(this_char, MNTOPT_LOGBUFS)) { 218 if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
209 if (!value || !*value) { 219 if (!value || !*value) {
210 cmn_err(CE_WARN, 220 xfs_warn(mp, "%s option requires an argument",
211 "XFS: %s option requires an argument",
212 this_char); 221 this_char);
213 return EINVAL; 222 return EINVAL;
214 } 223 }
215 mp->m_logbufs = simple_strtoul(value, &eov, 10); 224 mp->m_logbufs = simple_strtoul(value, &eov, 10);
216 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { 225 } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
217 if (!value || !*value) { 226 if (!value || !*value) {
218 cmn_err(CE_WARN, 227 xfs_warn(mp, "%s option requires an argument",
219 "XFS: %s option requires an argument",
220 this_char); 228 this_char);
221 return EINVAL; 229 return EINVAL;
222 } 230 }
223 mp->m_logbsize = suffix_strtoul(value, &eov, 10); 231 mp->m_logbsize = suffix_strtoul(value, &eov, 10);
224 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { 232 } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
225 if (!value || !*value) { 233 if (!value || !*value) {
226 cmn_err(CE_WARN, 234 xfs_warn(mp, "%s option requires an argument",
227 "XFS: %s option requires an argument",
228 this_char); 235 this_char);
229 return EINVAL; 236 return EINVAL;
230 } 237 }
@@ -232,14 +239,12 @@ xfs_parseargs(
232 if (!mp->m_logname) 239 if (!mp->m_logname)
233 return ENOMEM; 240 return ENOMEM;
234 } else if (!strcmp(this_char, MNTOPT_MTPT)) { 241 } else if (!strcmp(this_char, MNTOPT_MTPT)) {
235 cmn_err(CE_WARN, 242 xfs_warn(mp, "%s option not allowed on this system",
236 "XFS: %s option not allowed on this system",
237 this_char); 243 this_char);
238 return EINVAL; 244 return EINVAL;
239 } else if (!strcmp(this_char, MNTOPT_RTDEV)) { 245 } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
240 if (!value || !*value) { 246 if (!value || !*value) {
241 cmn_err(CE_WARN, 247 xfs_warn(mp, "%s option requires an argument",
242 "XFS: %s option requires an argument",
243 this_char); 248 this_char);
244 return EINVAL; 249 return EINVAL;
245 } 250 }
@@ -248,8 +253,7 @@ xfs_parseargs(
248 return ENOMEM; 253 return ENOMEM;
249 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { 254 } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
250 if (!value || !*value) { 255 if (!value || !*value) {
251 cmn_err(CE_WARN, 256 xfs_warn(mp, "%s option requires an argument",
252 "XFS: %s option requires an argument",
253 this_char); 257 this_char);
254 return EINVAL; 258 return EINVAL;
255 } 259 }
@@ -257,8 +261,7 @@ xfs_parseargs(
257 iosizelog = ffs(iosize) - 1; 261 iosizelog = ffs(iosize) - 1;
258 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { 262 } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
259 if (!value || !*value) { 263 if (!value || !*value) {
260 cmn_err(CE_WARN, 264 xfs_warn(mp, "%s option requires an argument",
261 "XFS: %s option requires an argument",
262 this_char); 265 this_char);
263 return EINVAL; 266 return EINVAL;
264 } 267 }
@@ -280,16 +283,14 @@ xfs_parseargs(
280 mp->m_flags |= XFS_MOUNT_SWALLOC; 283 mp->m_flags |= XFS_MOUNT_SWALLOC;
281 } else if (!strcmp(this_char, MNTOPT_SUNIT)) { 284 } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
282 if (!value || !*value) { 285 if (!value || !*value) {
283 cmn_err(CE_WARN, 286 xfs_warn(mp, "%s option requires an argument",
284 "XFS: %s option requires an argument",
285 this_char); 287 this_char);
286 return EINVAL; 288 return EINVAL;
287 } 289 }
288 dsunit = simple_strtoul(value, &eov, 10); 290 dsunit = simple_strtoul(value, &eov, 10);
289 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { 291 } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
290 if (!value || !*value) { 292 if (!value || !*value) {
291 cmn_err(CE_WARN, 293 xfs_warn(mp, "%s option requires an argument",
292 "XFS: %s option requires an argument",
293 this_char); 294 this_char);
294 return EINVAL; 295 return EINVAL;
295 } 296 }
@@ -297,8 +298,7 @@ xfs_parseargs(
297 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { 298 } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
298 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; 299 mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
299#if !XFS_BIG_INUMS 300#if !XFS_BIG_INUMS
300 cmn_err(CE_WARN, 301 xfs_warn(mp, "%s option not allowed on this system",
301 "XFS: %s option not allowed on this system",
302 this_char); 302 this_char);
303 return EINVAL; 303 return EINVAL;
304#endif 304#endif
@@ -356,20 +356,19 @@ xfs_parseargs(
356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { 356 } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG; 357 mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
358 } else if (!strcmp(this_char, "ihashsize")) { 358 } else if (!strcmp(this_char, "ihashsize")) {
359 cmn_err(CE_WARN, 359 xfs_warn(mp,
360 "XFS: ihashsize no longer used, option is deprecated."); 360 "ihashsize no longer used, option is deprecated.");
361 } else if (!strcmp(this_char, "osyncisdsync")) { 361 } else if (!strcmp(this_char, "osyncisdsync")) {
362 cmn_err(CE_WARN, 362 xfs_warn(mp,
363 "XFS: osyncisdsync has no effect, option is deprecated."); 363 "osyncisdsync has no effect, option is deprecated.");
364 } else if (!strcmp(this_char, "osyncisosync")) { 364 } else if (!strcmp(this_char, "osyncisosync")) {
365 cmn_err(CE_WARN, 365 xfs_warn(mp,
366 "XFS: osyncisosync has no effect, option is deprecated."); 366 "osyncisosync has no effect, option is deprecated.");
367 } else if (!strcmp(this_char, "irixsgid")) { 367 } else if (!strcmp(this_char, "irixsgid")) {
368 cmn_err(CE_WARN, 368 xfs_warn(mp,
369 "XFS: irixsgid is now a sysctl(2) variable, option is deprecated."); 369 "irixsgid is now a sysctl(2) variable, option is deprecated.");
370 } else { 370 } else {
371 cmn_err(CE_WARN, 371 xfs_warn(mp, "unknown mount option [%s].", this_char);
372 "XFS: unknown mount option [%s].", this_char);
373 return EINVAL; 372 return EINVAL;
374 } 373 }
375 } 374 }
@@ -379,40 +378,37 @@ xfs_parseargs(
379 */ 378 */
380 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && 379 if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
381 !(mp->m_flags & XFS_MOUNT_RDONLY)) { 380 !(mp->m_flags & XFS_MOUNT_RDONLY)) {
382 cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); 381 xfs_warn(mp, "no-recovery mounts must be read-only.");
383 return EINVAL; 382 return EINVAL;
384 } 383 }
385 384
386 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { 385 if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
387 cmn_err(CE_WARN, 386 xfs_warn(mp,
388 "XFS: sunit and swidth options incompatible with the noalign option"); 387 "sunit and swidth options incompatible with the noalign option");
389 return EINVAL; 388 return EINVAL;
390 } 389 }
391 390
392#ifndef CONFIG_XFS_QUOTA 391#ifndef CONFIG_XFS_QUOTA
393 if (XFS_IS_QUOTA_RUNNING(mp)) { 392 if (XFS_IS_QUOTA_RUNNING(mp)) {
394 cmn_err(CE_WARN, 393 xfs_warn(mp, "quota support not available in this kernel.");
395 "XFS: quota support not available in this kernel.");
396 return EINVAL; 394 return EINVAL;
397 } 395 }
398#endif 396#endif
399 397
400 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && 398 if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
401 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { 399 (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
402 cmn_err(CE_WARN, 400 xfs_warn(mp, "cannot mount with both project and group quota");
403 "XFS: cannot mount with both project and group quota");
404 return EINVAL; 401 return EINVAL;
405 } 402 }
406 403
407 if ((dsunit && !dswidth) || (!dsunit && dswidth)) { 404 if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
408 cmn_err(CE_WARN, 405 xfs_warn(mp, "sunit and swidth must be specified together");
409 "XFS: sunit and swidth must be specified together");
410 return EINVAL; 406 return EINVAL;
411 } 407 }
412 408
413 if (dsunit && (dswidth % dsunit != 0)) { 409 if (dsunit && (dswidth % dsunit != 0)) {
414 cmn_err(CE_WARN, 410 xfs_warn(mp,
415 "XFS: stripe width (%d) must be a multiple of the stripe unit (%d)", 411 "stripe width (%d) must be a multiple of the stripe unit (%d)",
416 dswidth, dsunit); 412 dswidth, dsunit);
417 return EINVAL; 413 return EINVAL;
418 } 414 }
@@ -438,8 +434,7 @@ done:
438 mp->m_logbufs != 0 && 434 mp->m_logbufs != 0 &&
439 (mp->m_logbufs < XLOG_MIN_ICLOGS || 435 (mp->m_logbufs < XLOG_MIN_ICLOGS ||
440 mp->m_logbufs > XLOG_MAX_ICLOGS)) { 436 mp->m_logbufs > XLOG_MAX_ICLOGS)) {
441 cmn_err(CE_WARN, 437 xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
442 "XFS: invalid logbufs value: %d [not %d-%d]",
443 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); 438 mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
444 return XFS_ERROR(EINVAL); 439 return XFS_ERROR(EINVAL);
445 } 440 }
@@ -448,22 +443,16 @@ done:
448 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || 443 (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
449 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || 444 mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
450 !is_power_of_2(mp->m_logbsize))) { 445 !is_power_of_2(mp->m_logbsize))) {
451 cmn_err(CE_WARN, 446 xfs_warn(mp,
452 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", 447 "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
453 mp->m_logbsize); 448 mp->m_logbsize);
454 return XFS_ERROR(EINVAL); 449 return XFS_ERROR(EINVAL);
455 } 450 }
456 451
457 mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
458 if (!mp->m_fsname)
459 return ENOMEM;
460 mp->m_fsname_len = strlen(mp->m_fsname) + 1;
461
462 if (iosizelog) { 452 if (iosizelog) {
463 if (iosizelog > XFS_MAX_IO_LOG || 453 if (iosizelog > XFS_MAX_IO_LOG ||
464 iosizelog < XFS_MIN_IO_LOG) { 454 iosizelog < XFS_MIN_IO_LOG) {
465 cmn_err(CE_WARN, 455 xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
466 "XFS: invalid log iosize: %d [not %d-%d]",
467 iosizelog, XFS_MIN_IO_LOG, 456 iosizelog, XFS_MIN_IO_LOG,
468 XFS_MAX_IO_LOG); 457 XFS_MAX_IO_LOG);
469 return XFS_ERROR(EINVAL); 458 return XFS_ERROR(EINVAL);
@@ -610,7 +599,7 @@ xfs_blkdev_get(
610 mp); 599 mp);
611 if (IS_ERR(*bdevp)) { 600 if (IS_ERR(*bdevp)) {
612 error = PTR_ERR(*bdevp); 601 error = PTR_ERR(*bdevp);
613 printk("XFS: Invalid device [%s], error=%d\n", name, error); 602 xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
614 } 603 }
615 604
616 return -error; 605 return -error;
@@ -664,23 +653,23 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
664 int error; 653 int error;
665 654
666 if (mp->m_logdev_targp != mp->m_ddev_targp) { 655 if (mp->m_logdev_targp != mp->m_ddev_targp) {
667 xfs_fs_cmn_err(CE_NOTE, mp, 656 xfs_notice(mp,
668 "Disabling barriers, not supported with external log device"); 657 "Disabling barriers, not supported with external log device");
669 mp->m_flags &= ~XFS_MOUNT_BARRIER; 658 mp->m_flags &= ~XFS_MOUNT_BARRIER;
670 return; 659 return;
671 } 660 }
672 661
673 if (xfs_readonly_buftarg(mp->m_ddev_targp)) { 662 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
674 xfs_fs_cmn_err(CE_NOTE, mp, 663 xfs_notice(mp,
675 "Disabling barriers, underlying device is readonly"); 664 "Disabling barriers, underlying device is readonly");
676 mp->m_flags &= ~XFS_MOUNT_BARRIER; 665 mp->m_flags &= ~XFS_MOUNT_BARRIER;
677 return; 666 return;
678 } 667 }
679 668
680 error = xfs_barrier_test(mp); 669 error = xfs_barrier_test(mp);
681 if (error) { 670 if (error) {
682 xfs_fs_cmn_err(CE_NOTE, mp, 671 xfs_notice(mp,
683 "Disabling barriers, trial barrier write failed"); 672 "Disabling barriers, trial barrier write failed");
684 mp->m_flags &= ~XFS_MOUNT_BARRIER; 673 mp->m_flags &= ~XFS_MOUNT_BARRIER;
685 return; 674 return;
686 } 675 }
@@ -743,8 +732,8 @@ xfs_open_devices(
743 goto out_close_logdev; 732 goto out_close_logdev;
744 733
745 if (rtdev == ddev || rtdev == logdev) { 734 if (rtdev == ddev || rtdev == logdev) {
746 cmn_err(CE_WARN, 735 xfs_warn(mp,
747 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); 736 "Cannot mount filesystem with identical rtdev and ddev/logdev.");
748 error = EINVAL; 737 error = EINVAL;
749 goto out_close_rtdev; 738 goto out_close_rtdev;
750 } 739 }
@@ -1345,8 +1334,8 @@ xfs_fs_remount(
1345 * options that we can't actually change. 1334 * options that we can't actually change.
1346 */ 1335 */
1347#if 0 1336#if 0
1348 printk(KERN_INFO 1337 xfs_info(mp,
1349 "XFS: mount option \"%s\" not supported for remount\n", p); 1338 "mount option \"%s\" not supported for remount\n", p);
1350 return -EINVAL; 1339 return -EINVAL;
1351#else 1340#else
1352 break; 1341 break;
@@ -1367,8 +1356,7 @@ xfs_fs_remount(
1367 if (mp->m_update_flags) { 1356 if (mp->m_update_flags) {
1368 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1357 error = xfs_mount_log_sb(mp, mp->m_update_flags);
1369 if (error) { 1358 if (error) {
1370 cmn_err(CE_WARN, 1359 xfs_warn(mp, "failed to write sb changes");
1371 "XFS: failed to write sb changes");
1372 return error; 1360 return error;
1373 } 1361 }
1374 mp->m_update_flags = 0; 1362 mp->m_update_flags = 0;
@@ -1452,15 +1440,15 @@ xfs_finish_flags(
1452 mp->m_logbsize = mp->m_sb.sb_logsunit; 1440 mp->m_logbsize = mp->m_sb.sb_logsunit;
1453 } else if (mp->m_logbsize > 0 && 1441 } else if (mp->m_logbsize > 0 &&
1454 mp->m_logbsize < mp->m_sb.sb_logsunit) { 1442 mp->m_logbsize < mp->m_sb.sb_logsunit) {
1455 cmn_err(CE_WARN, 1443 xfs_warn(mp,
1456 "XFS: logbuf size must be greater than or equal to log stripe size"); 1444 "logbuf size must be greater than or equal to log stripe size");
1457 return XFS_ERROR(EINVAL); 1445 return XFS_ERROR(EINVAL);
1458 } 1446 }
1459 } else { 1447 } else {
1460 /* Fail a mount if the logbuf is larger than 32K */ 1448 /* Fail a mount if the logbuf is larger than 32K */
1461 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { 1449 if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1462 cmn_err(CE_WARN, 1450 xfs_warn(mp,
1463 "XFS: logbuf size for version 1 logs must be 16K or 32K"); 1451 "logbuf size for version 1 logs must be 16K or 32K");
1464 return XFS_ERROR(EINVAL); 1452 return XFS_ERROR(EINVAL);
1465 } 1453 }
1466 } 1454 }
@@ -1477,8 +1465,8 @@ xfs_finish_flags(
1477 * prohibit r/w mounts of read-only filesystems 1465 * prohibit r/w mounts of read-only filesystems
1478 */ 1466 */
1479 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { 1467 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1480 cmn_err(CE_WARN, 1468 xfs_warn(mp,
1481 "XFS: cannot mount a read-only filesystem as read-write"); 1469 "cannot mount a read-only filesystem as read-write");
1482 return XFS_ERROR(EROFS); 1470 return XFS_ERROR(EROFS);
1483 } 1471 }
1484 1472
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index e22f0057d21f..6c10f1d2e3d3 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -425,8 +425,7 @@ xfs_quiesce_attr(
425 /* Push the superblock and write an unmount record */ 425 /* Push the superblock and write an unmount record */
426 error = xfs_log_sbcount(mp, 1); 426 error = xfs_log_sbcount(mp, 1);
427 if (error) 427 if (error)
428 xfs_fs_cmn_err(CE_WARN, mp, 428 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
429 "xfs_attr_quiesce: failed to log sb changes. "
430 "Frozen image may not be consistent."); 429 "Frozen image may not be consistent.");
431 xfs_log_unmount_write(mp); 430 xfs_log_unmount_write(mp);
432 xfs_unmountfs_writesb(mp); 431 xfs_unmountfs_writesb(mp);
@@ -806,7 +805,7 @@ xfs_reclaim_inode(
806 * pass on the error. 805 * pass on the error.
807 */ 806 */
808 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { 807 if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
809 xfs_fs_cmn_err(CE_WARN, ip->i_mount, 808 xfs_warn(ip->i_mount,
810 "inode 0x%llx background reclaim flush failed with %d", 809 "inode 0x%llx background reclaim flush failed with %d",
811 (long long)ip->i_ino, error); 810 (long long)ip->i_ino, error);
812 } 811 }
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index ee3cee097e7e..ee2d2adaa438 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -37,7 +37,7 @@ xfs_stats_clear_proc_handler(
37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); 37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
38 38
39 if (!ret && write && *valp) { 39 if (!ret && write && *valp) {
40 printk("XFS Clearing xfsstats\n"); 40 xfs_notice(NULL, "Clearing xfsstats");
41 for_each_possible_cpu(c) { 41 for_each_possible_cpu(c) {
42 preempt_disable(); 42 preempt_disable();
43 /* save vn_active, it's a universal truth! */ 43 /* save vn_active, it's a universal truth! */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index d22aa3103106..7e2416478503 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -544,9 +544,10 @@ xfs_qm_dqtobp(
544 /* 544 /*
545 * A simple sanity check in case we got a corrupted dquot... 545 * A simple sanity check in case we got a corrupted dquot...
546 */ 546 */
547 if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, 547 error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
548 flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), 548 flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
549 "dqtobp")) { 549 "dqtobp");
550 if (error) {
550 if (!(flags & XFS_QMOPT_DQREPAIR)) { 551 if (!(flags & XFS_QMOPT_DQREPAIR)) {
551 xfs_trans_brelse(tp, bp); 552 xfs_trans_brelse(tp, bp);
552 return XFS_ERROR(EIO); 553 return XFS_ERROR(EIO);
@@ -827,7 +828,7 @@ xfs_qm_dqget(
827 if (xfs_do_dqerror) { 828 if (xfs_do_dqerror) {
828 if ((xfs_dqerror_target == mp->m_ddev_targp) && 829 if ((xfs_dqerror_target == mp->m_ddev_targp) &&
829 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { 830 (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
830 cmn_err(CE_DEBUG, "Returning error in dqget"); 831 xfs_debug(mp, "Returning error in dqget");
831 return (EIO); 832 return (EIO);
832 } 833 }
833 } 834 }
@@ -1207,8 +1208,9 @@ xfs_qm_dqflush(
1207 /* 1208 /*
1208 * A simple sanity check in case we got a corrupted dquot.. 1209 * A simple sanity check in case we got a corrupted dquot..
1209 */ 1210 */
1210 if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, 1211 error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
1211 XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { 1212 XFS_QMOPT_DOWARN, "dqflush (incore copy)");
1213 if (error) {
1212 xfs_buf_relse(bp); 1214 xfs_buf_relse(bp);
1213 xfs_dqfunlock(dqp); 1215 xfs_dqfunlock(dqp);
1214 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1216 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
@@ -1391,8 +1393,8 @@ xfs_qm_dqpurge(
1391 */ 1393 */
1392 error = xfs_qm_dqflush(dqp, SYNC_WAIT); 1394 error = xfs_qm_dqflush(dqp, SYNC_WAIT);
1393 if (error) 1395 if (error)
1394 xfs_fs_cmn_err(CE_WARN, mp, 1396 xfs_warn(mp, "%s: dquot %p flush failed",
1395 "xfs_qm_dqpurge: dquot %p flush failed", dqp); 1397 __func__, dqp);
1396 xfs_dqflock(dqp); 1398 xfs_dqflock(dqp);
1397 } 1399 }
1398 ASSERT(atomic_read(&dqp->q_pincount) == 0); 1400 ASSERT(atomic_read(&dqp->q_pincount) == 0);
@@ -1425,36 +1427,38 @@ xfs_qm_dqpurge(
1425void 1427void
1426xfs_qm_dqprint(xfs_dquot_t *dqp) 1428xfs_qm_dqprint(xfs_dquot_t *dqp)
1427{ 1429{
1428 cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------"); 1430 struct xfs_mount *mp = dqp->q_mount;
1429 cmn_err(CE_DEBUG, "---- dquotID = %d", 1431
1432 xfs_debug(mp, "-----------KERNEL DQUOT----------------");
1433 xfs_debug(mp, "---- dquotID = %d",
1430 (int)be32_to_cpu(dqp->q_core.d_id)); 1434 (int)be32_to_cpu(dqp->q_core.d_id));
1431 cmn_err(CE_DEBUG, "---- type = %s", DQFLAGTO_TYPESTR(dqp)); 1435 xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp));
1432 cmn_err(CE_DEBUG, "---- fs = 0x%p", dqp->q_mount); 1436 xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount);
1433 cmn_err(CE_DEBUG, "---- blkno = 0x%x", (int) dqp->q_blkno); 1437 xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno);
1434 cmn_err(CE_DEBUG, "---- boffset = 0x%x", (int) dqp->q_bufoffset); 1438 xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset);
1435 cmn_err(CE_DEBUG, "---- blkhlimit = %Lu (0x%x)", 1439 xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)",
1436 be64_to_cpu(dqp->q_core.d_blk_hardlimit), 1440 be64_to_cpu(dqp->q_core.d_blk_hardlimit),
1437 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit)); 1441 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
1438 cmn_err(CE_DEBUG, "---- blkslimit = %Lu (0x%x)", 1442 xfs_debug(mp, "---- blkslimit = %Lu (0x%x)",
1439 be64_to_cpu(dqp->q_core.d_blk_softlimit), 1443 be64_to_cpu(dqp->q_core.d_blk_softlimit),
1440 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit)); 1444 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
1441 cmn_err(CE_DEBUG, "---- inohlimit = %Lu (0x%x)", 1445 xfs_debug(mp, "---- inohlimit = %Lu (0x%x)",
1442 be64_to_cpu(dqp->q_core.d_ino_hardlimit), 1446 be64_to_cpu(dqp->q_core.d_ino_hardlimit),
1443 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit)); 1447 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
1444 cmn_err(CE_DEBUG, "---- inoslimit = %Lu (0x%x)", 1448 xfs_debug(mp, "---- inoslimit = %Lu (0x%x)",
1445 be64_to_cpu(dqp->q_core.d_ino_softlimit), 1449 be64_to_cpu(dqp->q_core.d_ino_softlimit),
1446 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit)); 1450 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
1447 cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", 1451 xfs_debug(mp, "---- bcount = %Lu (0x%x)",
1448 be64_to_cpu(dqp->q_core.d_bcount), 1452 be64_to_cpu(dqp->q_core.d_bcount),
1449 (int)be64_to_cpu(dqp->q_core.d_bcount)); 1453 (int)be64_to_cpu(dqp->q_core.d_bcount));
1450 cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", 1454 xfs_debug(mp, "---- icount = %Lu (0x%x)",
1451 be64_to_cpu(dqp->q_core.d_icount), 1455 be64_to_cpu(dqp->q_core.d_icount),
1452 (int)be64_to_cpu(dqp->q_core.d_icount)); 1456 (int)be64_to_cpu(dqp->q_core.d_icount));
1453 cmn_err(CE_DEBUG, "---- btimer = %d", 1457 xfs_debug(mp, "---- btimer = %d",
1454 (int)be32_to_cpu(dqp->q_core.d_btimer)); 1458 (int)be32_to_cpu(dqp->q_core.d_btimer));
1455 cmn_err(CE_DEBUG, "---- itimer = %d", 1459 xfs_debug(mp, "---- itimer = %d",
1456 (int)be32_to_cpu(dqp->q_core.d_itimer)); 1460 (int)be32_to_cpu(dqp->q_core.d_itimer));
1457 cmn_err(CE_DEBUG, "---------------------------"); 1461 xfs_debug(mp, "---------------------------");
1458} 1462}
1459#endif 1463#endif
1460 1464
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 2a1f3dc10a02..9e0e2fa3f2c8 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -136,9 +136,8 @@ xfs_qm_dquot_logitem_push(
136 */ 136 */
137 error = xfs_qm_dqflush(dqp, 0); 137 error = xfs_qm_dqflush(dqp, 0);
138 if (error) 138 if (error)
139 xfs_fs_cmn_err(CE_WARN, dqp->q_mount, 139 xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
140 "xfs_qm_dquot_logitem_push: push error %d on dqp %p", 140 __func__, error, dqp);
141 error, dqp);
142 xfs_dqunlock(dqp); 141 xfs_dqunlock(dqp);
143} 142}
144 143
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 206a2815ced6..254ee062bd7d 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -80,7 +80,7 @@ xfs_qm_dquot_list_print(
80 int i = 0; 80 int i = 0;
81 81
82 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) { 82 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
83 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" " 83 xfs_debug(mp, " %d. \"%d (%s)\" "
84 "bcnt = %lld, icnt = %lld, refs = %d", 84 "bcnt = %lld, icnt = %lld, refs = %d",
85 i++, be32_to_cpu(dqp->q_core.d_id), 85 i++, be32_to_cpu(dqp->q_core.d_id),
86 DQFLAGTO_TYPESTR(dqp), 86 DQFLAGTO_TYPESTR(dqp),
@@ -205,7 +205,7 @@ xfs_qm_destroy(
205 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { 205 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
206 xfs_dqlock(dqp); 206 xfs_dqlock(dqp);
207#ifdef QUOTADEBUG 207#ifdef QUOTADEBUG
208 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp); 208 xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp);
209#endif 209#endif
210 list_del_init(&dqp->q_freelist); 210 list_del_init(&dqp->q_freelist);
211 xfs_Gqm->qm_dqfrlist_cnt--; 211 xfs_Gqm->qm_dqfrlist_cnt--;
@@ -341,9 +341,7 @@ xfs_qm_mount_quotas(
341 * quotas immediately. 341 * quotas immediately.
342 */ 342 */
343 if (mp->m_sb.sb_rextents) { 343 if (mp->m_sb.sb_rextents) {
344 cmn_err(CE_NOTE, 344 xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
345 "Cannot turn on quotas for realtime filesystem %s",
346 mp->m_fsname);
347 mp->m_qflags = 0; 345 mp->m_qflags = 0;
348 goto write_changes; 346 goto write_changes;
349 } 347 }
@@ -402,14 +400,13 @@ xfs_qm_mount_quotas(
402 * off, but the on disk superblock doesn't know that ! 400 * off, but the on disk superblock doesn't know that !
403 */ 401 */
404 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); 402 ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
405 xfs_fs_cmn_err(CE_ALERT, mp, 403 xfs_alert(mp, "%s: Superblock update failed!",
406 "XFS mount_quotas: Superblock update failed!"); 404 __func__);
407 } 405 }
408 } 406 }
409 407
410 if (error) { 408 if (error) {
411 xfs_fs_cmn_err(CE_WARN, mp, 409 xfs_warn(mp, "Failed to initialize disk quotas.");
412 "Failed to initialize disk quotas.");
413 return; 410 return;
414 } 411 }
415 412
@@ -1230,13 +1227,6 @@ xfs_qm_qino_alloc(
1230 } 1227 }
1231 1228
1232 /* 1229 /*
1233 * Keep an extra reference to this quota inode. This inode is
1234 * locked exclusively and joined to the transaction already.
1235 */
1236 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1237 IHOLD(*ip);
1238
1239 /*
1240 * Make the changes in the superblock, and log those too. 1230 * Make the changes in the superblock, and log those too.
1241 * sbfields arg may contain fields other than *QUOTINO; 1231 * sbfields arg may contain fields other than *QUOTINO;
1242 * VERSIONNUM for example. 1232 * VERSIONNUM for example.
@@ -1264,7 +1254,7 @@ xfs_qm_qino_alloc(
1264 xfs_mod_sb(tp, sbfields); 1254 xfs_mod_sb(tp, sbfields);
1265 1255
1266 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { 1256 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1267 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!"); 1257 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
1268 return error; 1258 return error;
1269 } 1259 }
1270 return 0; 1260 return 0;
@@ -1299,7 +1289,7 @@ xfs_qm_reset_dqcounts(
1299 * output any warnings because it's perfectly possible to 1289 * output any warnings because it's perfectly possible to
1300 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. 1290 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1301 */ 1291 */
1302 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR, 1292 (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1303 "xfs_quotacheck"); 1293 "xfs_quotacheck");
1304 ddq->d_bcount = 0; 1294 ddq->d_bcount = 0;
1305 ddq->d_icount = 0; 1295 ddq->d_icount = 0;
@@ -1676,7 +1666,7 @@ xfs_qm_quotacheck(
1676 */ 1666 */
1677 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); 1667 ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
1678 1668
1679 cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname); 1669 xfs_notice(mp, "Quotacheck needed: Please wait.");
1680 1670
1681 /* 1671 /*
1682 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset 1672 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
@@ -1754,9 +1744,9 @@ xfs_qm_quotacheck(
1754 1744
1755 error_return: 1745 error_return:
1756 if (error) { 1746 if (error) {
1757 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): " 1747 xfs_warn(mp,
1758 "Disabling quotas.", 1748 "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1759 mp->m_fsname, error); 1749 error);
1760 /* 1750 /*
1761 * We must turn off quotas. 1751 * We must turn off quotas.
1762 */ 1752 */
@@ -1764,12 +1754,11 @@ xfs_qm_quotacheck(
1764 ASSERT(xfs_Gqm != NULL); 1754 ASSERT(xfs_Gqm != NULL);
1765 xfs_qm_destroy_quotainfo(mp); 1755 xfs_qm_destroy_quotainfo(mp);
1766 if (xfs_mount_reset_sbqflags(mp)) { 1756 if (xfs_mount_reset_sbqflags(mp)) {
1767 cmn_err(CE_WARN, "XFS quotacheck %s: " 1757 xfs_warn(mp,
1768 "Failed to reset quota flags.", mp->m_fsname); 1758 "Quotacheck: Failed to reset quota flags.");
1769 } 1759 }
1770 } else { 1760 } else
1771 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname); 1761 xfs_notice(mp, "Quotacheck: Done.");
1772 }
1773 return (error); 1762 return (error);
1774} 1763}
1775 1764
@@ -1937,8 +1926,8 @@ again:
1937 */ 1926 */
1938 error = xfs_qm_dqflush(dqp, 0); 1927 error = xfs_qm_dqflush(dqp, 0);
1939 if (error) { 1928 if (error) {
1940 xfs_fs_cmn_err(CE_WARN, mp, 1929 xfs_warn(mp, "%s: dquot %p flush failed",
1941 "xfs_qm_dqreclaim: dquot %p flush failed", dqp); 1930 __func__, dqp);
1942 } 1931 }
1943 goto dqunlock; 1932 goto dqunlock;
1944 } 1933 }
@@ -2115,7 +2104,7 @@ xfs_qm_write_sb_changes(
2115 int error; 2104 int error;
2116 2105
2117#ifdef QUOTADEBUG 2106#ifdef QUOTADEBUG
2118 cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname); 2107 xfs_notice(mp, "Writing superblock quota changes");
2119#endif 2108#endif
2120 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 2109 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2121 if ((error = xfs_trans_reserve(tp, 0, 2110 if ((error = xfs_trans_reserve(tp, 0,
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 45b5cb1788ab..774d7ec6df8e 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -119,8 +119,7 @@ xfs_qm_newmount(
119 (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || 119 (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
120 (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && 120 (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) &&
121 xfs_dev_is_read_only(mp, "changing quota state")) { 121 xfs_dev_is_read_only(mp, "changing quota state")) {
122 cmn_err(CE_WARN, 122 xfs_warn(mp, "please mount with%s%s%s%s.",
123 "XFS: please mount with%s%s%s%s.",
124 (!quotaondisk ? "out quota" : ""), 123 (!quotaondisk ? "out quota" : ""),
125 (uquotaondisk ? " usrquota" : ""), 124 (uquotaondisk ? " usrquota" : ""),
126 (pquotaondisk ? " prjquota" : ""), 125 (pquotaondisk ? " prjquota" : ""),
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index bdebc183223e..c82f06778a27 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -41,12 +41,6 @@
41#include "xfs_qm.h" 41#include "xfs_qm.h"
42#include "xfs_trace.h" 42#include "xfs_trace.h"
43 43
44#ifdef DEBUG
45# define qdprintk(s, args...) cmn_err(CE_DEBUG, s, ## args)
46#else
47# define qdprintk(s, args...) do { } while (0)
48#endif
49
50STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 44STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
51STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, 45STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
52 uint); 46 uint);
@@ -294,7 +288,8 @@ xfs_qm_scall_trunc_qfiles(
294 int error = 0, error2 = 0; 288 int error = 0, error2 = 0;
295 289
296 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { 290 if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
297 qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); 291 xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
292 __func__, flags, mp->m_qflags);
298 return XFS_ERROR(EINVAL); 293 return XFS_ERROR(EINVAL);
299 } 294 }
300 295
@@ -331,7 +326,8 @@ xfs_qm_scall_quotaon(
331 sbflags = 0; 326 sbflags = 0;
332 327
333 if (flags == 0) { 328 if (flags == 0) {
334 qdprintk("quotaon: zero flags, m_qflags=%x\n", mp->m_qflags); 329 xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
330 __func__, mp->m_qflags);
335 return XFS_ERROR(EINVAL); 331 return XFS_ERROR(EINVAL);
336 } 332 }
337 333
@@ -352,8 +348,9 @@ xfs_qm_scall_quotaon(
352 (flags & XFS_GQUOTA_ACCT) == 0 && 348 (flags & XFS_GQUOTA_ACCT) == 0 &&
353 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && 349 (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
354 (flags & XFS_OQUOTA_ENFD))) { 350 (flags & XFS_OQUOTA_ENFD))) {
355 qdprintk("Can't enforce without acct, flags=%x sbflags=%x\n", 351 xfs_debug(mp,
356 flags, mp->m_sb.sb_qflags); 352 "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
353 __func__, flags, mp->m_sb.sb_qflags);
357 return XFS_ERROR(EINVAL); 354 return XFS_ERROR(EINVAL);
358 } 355 }
359 /* 356 /*
@@ -541,7 +538,7 @@ xfs_qm_scall_setqlim(
541 q->qi_bsoftlimit = soft; 538 q->qi_bsoftlimit = soft;
542 } 539 }
543 } else { 540 } else {
544 qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); 541 xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
545 } 542 }
546 hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? 543 hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
547 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : 544 (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
@@ -557,7 +554,7 @@ xfs_qm_scall_setqlim(
557 q->qi_rtbsoftlimit = soft; 554 q->qi_rtbsoftlimit = soft;
558 } 555 }
559 } else { 556 } else {
560 qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); 557 xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
561 } 558 }
562 559
563 hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? 560 hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
@@ -574,7 +571,7 @@ xfs_qm_scall_setqlim(
574 q->qi_isoftlimit = soft; 571 q->qi_isoftlimit = soft;
575 } 572 }
576 } else { 573 } else {
577 qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); 574 xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
578 } 575 }
579 576
580 /* 577 /*
@@ -939,10 +936,11 @@ struct mutex qcheck_lock;
939#define DQTEST_LIST_PRINT(l, NXT, title) \ 936#define DQTEST_LIST_PRINT(l, NXT, title) \
940{ \ 937{ \
941 xfs_dqtest_t *dqp; int i = 0;\ 938 xfs_dqtest_t *dqp; int i = 0;\
942 cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \ 939 xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \
943 for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \ 940 for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
944 dqp = (xfs_dqtest_t *)dqp->NXT) { \ 941 dqp = (xfs_dqtest_t *)dqp->NXT) { \
945 cmn_err(CE_DEBUG, " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \ 942 xfs_debug(dqp->q_mount, \
943 " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \
946 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \ 944 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \
947 dqp->d_bcount, dqp->d_icount); } \ 945 dqp->d_bcount, dqp->d_icount); } \
948} 946}
@@ -966,16 +964,17 @@ xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
966} 964}
967STATIC void 965STATIC void
968xfs_qm_dqtest_print( 966xfs_qm_dqtest_print(
969 xfs_dqtest_t *d) 967 struct xfs_mount *mp,
968 struct dqtest *d)
970{ 969{
971 cmn_err(CE_DEBUG, "-----------DQTEST DQUOT----------------"); 970 xfs_debug(mp, "-----------DQTEST DQUOT----------------");
972 cmn_err(CE_DEBUG, "---- dquot ID = %d", d->d_id); 971 xfs_debug(mp, "---- dquot ID = %d", d->d_id);
973 cmn_err(CE_DEBUG, "---- fs = 0x%p", d->q_mount); 972 xfs_debug(mp, "---- fs = 0x%p", d->q_mount);
974 cmn_err(CE_DEBUG, "---- bcount = %Lu (0x%x)", 973 xfs_debug(mp, "---- bcount = %Lu (0x%x)",
975 d->d_bcount, (int)d->d_bcount); 974 d->d_bcount, (int)d->d_bcount);
976 cmn_err(CE_DEBUG, "---- icount = %Lu (0x%x)", 975 xfs_debug(mp, "---- icount = %Lu (0x%x)",
977 d->d_icount, (int)d->d_icount); 976 d->d_icount, (int)d->d_icount);
978 cmn_err(CE_DEBUG, "---------------------------"); 977 xfs_debug(mp, "---------------------------");
979} 978}
980 979
981STATIC void 980STATIC void
@@ -989,12 +988,14 @@ xfs_qm_dqtest_failed(
989{ 988{
990 qmtest_nfails++; 989 qmtest_nfails++;
991 if (error) 990 if (error)
992 cmn_err(CE_DEBUG, "quotacheck failed id=%d, err=%d\nreason: %s", 991 xfs_debug(dqp->q_mount,
993 d->d_id, error, reason); 992 "quotacheck failed id=%d, err=%d\nreason: %s",
993 d->d_id, error, reason);
994 else 994 else
995 cmn_err(CE_DEBUG, "quotacheck failed id=%d (%s) [%d != %d]", 995 xfs_debug(dqp->q_mount,
996 d->d_id, reason, (int)a, (int)b); 996 "quotacheck failed id=%d (%s) [%d != %d]",
997 xfs_qm_dqtest_print(d); 997 d->d_id, reason, (int)a, (int)b);
998 xfs_qm_dqtest_print(dqp->q_mount, d);
998 if (dqp) 999 if (dqp)
999 xfs_qm_dqprint(dqp); 1000 xfs_qm_dqprint(dqp);
1000} 1001}
@@ -1021,9 +1022,9 @@ xfs_dqtest_cmp2(
1021 be64_to_cpu(dqp->q_core.d_bcount) >= 1022 be64_to_cpu(dqp->q_core.d_bcount) >=
1022 be64_to_cpu(dqp->q_core.d_blk_softlimit)) { 1023 be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
1023 if (!dqp->q_core.d_btimer && dqp->q_core.d_id) { 1024 if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
1024 cmn_err(CE_DEBUG, 1025 xfs_debug(dqp->q_mount,
1025 "%d [%s] [0x%p] BLK TIMER NOT STARTED", 1026 "%d [%s] BLK TIMER NOT STARTED",
1026 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1027 d->d_id, DQFLAGTO_TYPESTR(d));
1027 err++; 1028 err++;
1028 } 1029 }
1029 } 1030 }
@@ -1031,16 +1032,16 @@ xfs_dqtest_cmp2(
1031 be64_to_cpu(dqp->q_core.d_icount) >= 1032 be64_to_cpu(dqp->q_core.d_icount) >=
1032 be64_to_cpu(dqp->q_core.d_ino_softlimit)) { 1033 be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
1033 if (!dqp->q_core.d_itimer && dqp->q_core.d_id) { 1034 if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
1034 cmn_err(CE_DEBUG, 1035 xfs_debug(dqp->q_mount,
1035 "%d [%s] [0x%p] INO TIMER NOT STARTED", 1036 "%d [%s] INO TIMER NOT STARTED",
1036 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1037 d->d_id, DQFLAGTO_TYPESTR(d));
1037 err++; 1038 err++;
1038 } 1039 }
1039 } 1040 }
1040#ifdef QUOTADEBUG 1041#ifdef QUOTADEBUG
1041 if (!err) { 1042 if (!err) {
1042 cmn_err(CE_DEBUG, "%d [%s] [0x%p] qchecked", 1043 xfs_debug(dqp->q_mount, "%d [%s] qchecked",
1043 d->d_id, DQFLAGTO_TYPESTR(d), d->q_mount); 1044 d->d_id, DQFLAGTO_TYPESTR(d));
1044 } 1045 }
1045#endif 1046#endif
1046 return (err); 1047 return (err);
@@ -1137,8 +1138,8 @@ xfs_qm_internalqcheck_adjust(
1137 1138
1138 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { 1139 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1139 *res = BULKSTAT_RV_NOTHING; 1140 *res = BULKSTAT_RV_NOTHING;
1140 qdprintk("internalqcheck: ino=%llu, uqino=%llu, gqino=%llu\n", 1141 xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n",
1141 (unsigned long long) ino, 1142 __func__, (unsigned long long) ino,
1142 (unsigned long long) mp->m_sb.sb_uquotino, 1143 (unsigned long long) mp->m_sb.sb_uquotino,
1143 (unsigned long long) mp->m_sb.sb_gquotino); 1144 (unsigned long long) mp->m_sb.sb_gquotino);
1144 return XFS_ERROR(EINVAL); 1145 return XFS_ERROR(EINVAL);
@@ -1223,12 +1224,12 @@ xfs_qm_internalqcheck(
1223 xfs_qm_internalqcheck_adjust, 1224 xfs_qm_internalqcheck_adjust,
1224 0, NULL, &done); 1225 0, NULL, &done);
1225 if (error) { 1226 if (error) {
1226 cmn_err(CE_DEBUG, "Bulkstat returned error 0x%x", error); 1227 xfs_debug(mp, "Bulkstat returned error 0x%x", error);
1227 break; 1228 break;
1228 } 1229 }
1229 } while (!done); 1230 } while (!done);
1230 1231
1231 cmn_err(CE_DEBUG, "Checking results against system dquots"); 1232 xfs_debug(mp, "Checking results against system dquots");
1232 for (i = 0; i < qmtest_hashmask; i++) { 1233 for (i = 0; i < qmtest_hashmask; i++) {
1233 xfs_dqtest_t *d, *n; 1234 xfs_dqtest_t *d, *n;
1234 xfs_dqhash_t *h; 1235 xfs_dqhash_t *h;
@@ -1246,10 +1247,10 @@ xfs_qm_internalqcheck(
1246 } 1247 }
1247 1248
1248 if (qmtest_nfails) { 1249 if (qmtest_nfails) {
1249 cmn_err(CE_DEBUG, "******** quotacheck failed ********"); 1250 xfs_debug(mp, "******** quotacheck failed ********");
1250 cmn_err(CE_DEBUG, "failures = %d", qmtest_nfails); 1251 xfs_debug(mp, "failures = %d", qmtest_nfails);
1251 } else { 1252 } else {
1252 cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); 1253 xfs_debug(mp, "******** quotacheck successful! ********");
1253 } 1254 }
1254 kmem_free(qmtest_udqtab); 1255 kmem_free(qmtest_udqtab);
1255 kmem_free(qmtest_gdqtab); 1256 kmem_free(qmtest_gdqtab);
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 7de91d1b75c0..2a3648731331 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -643,8 +643,9 @@ xfs_trans_dqresv(
643 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && 643 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
644 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { 644 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
645#ifdef QUOTADEBUG 645#ifdef QUOTADEBUG
646 cmn_err(CE_DEBUG, "BLK Res: nblks=%ld + resbcount=%Ld" 646 xfs_debug(mp,
647 " > hardlimit=%Ld?", nblks, *resbcountp, hardlimit); 647 "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?",
648 nblks, *resbcountp, hardlimit);
648#endif 649#endif
649 if (nblks > 0) { 650 if (nblks > 0) {
650 /* 651 /*
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
deleted file mode 100644
index 0df88897ef84..000000000000
--- a/fs/xfs/support/debug.c
+++ /dev/null
@@ -1,107 +0,0 @@
1/*
2 * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include <xfs.h>
19#include "debug.h"
20
21/* xfs_mount.h drags a lot of crap in, sorry.. */
22#include "xfs_sb.h"
23#include "xfs_inum.h"
24#include "xfs_ag.h"
25#include "xfs_mount.h"
26#include "xfs_error.h"
27
28void
29cmn_err(
30 const char *lvl,
31 const char *fmt,
32 ...)
33{
34 struct va_format vaf;
35 va_list args;
36
37 va_start(args, fmt);
38 vaf.fmt = fmt;
39 vaf.va = &args;
40
41 printk("%s%pV", lvl, &vaf);
42 va_end(args);
43
44 BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0);
45}
46
47void
48xfs_fs_cmn_err(
49 const char *lvl,
50 struct xfs_mount *mp,
51 const char *fmt,
52 ...)
53{
54 struct va_format vaf;
55 va_list args;
56
57 va_start(args, fmt);
58 vaf.fmt = fmt;
59 vaf.va = &args;
60
61 printk("%sFilesystem %s: %pV", lvl, mp->m_fsname, &vaf);
62 va_end(args);
63
64 BUG_ON(strncmp(lvl, KERN_EMERG, strlen(KERN_EMERG)) == 0);
65}
66
67/* All callers to xfs_cmn_err use CE_ALERT, so don't bother testing lvl */
68void
69xfs_cmn_err(
70 int panic_tag,
71 const char *lvl,
72 struct xfs_mount *mp,
73 const char *fmt,
74 ...)
75{
76 struct va_format vaf;
77 va_list args;
78 int do_panic = 0;
79
80 if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
81 printk(KERN_ALERT "XFS: Transforming an alert into a BUG.");
82 do_panic = 1;
83 }
84
85 va_start(args, fmt);
86 vaf.fmt = fmt;
87 vaf.va = &args;
88
89 printk(KERN_ALERT "Filesystem %s: %pV", mp->m_fsname, &vaf);
90 va_end(args);
91
92 BUG_ON(do_panic);
93}
94
95void
96assfail(char *expr, char *file, int line)
97{
98 printk(KERN_CRIT "Assertion failed: %s, file: %s, line: %d\n", expr,
99 file, line);
100 BUG();
101}
102
103void
104xfs_hex_dump(void *p, int length)
105{
106 print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
107}
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
deleted file mode 100644
index 05699f67d475..000000000000
--- a/fs/xfs/support/debug.h
+++ /dev/null
@@ -1,61 +0,0 @@
1/*
2 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_SUPPORT_DEBUG_H__
19#define __XFS_SUPPORT_DEBUG_H__
20
21#include <stdarg.h>
22
23struct xfs_mount;
24
25#define CE_DEBUG KERN_DEBUG
26#define CE_CONT KERN_INFO
27#define CE_NOTE KERN_NOTICE
28#define CE_WARN KERN_WARNING
29#define CE_ALERT KERN_ALERT
30#define CE_PANIC KERN_EMERG
31
32void cmn_err(const char *lvl, const char *fmt, ...)
33 __attribute__ ((format (printf, 2, 3)));
34void xfs_fs_cmn_err( const char *lvl, struct xfs_mount *mp,
35 const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
36void xfs_cmn_err( int panic_tag, const char *lvl, struct xfs_mount *mp,
37 const char *fmt, ...) __attribute__ ((format (printf, 4, 5)));
38
39extern void assfail(char *expr, char *f, int l);
40
41#define ASSERT_ALWAYS(expr) \
42 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
43
44#ifndef DEBUG
45#define ASSERT(expr) ((void)0)
46
47#ifndef STATIC
48# define STATIC static noinline
49#endif
50
51#else /* DEBUG */
52
53#define ASSERT(expr) \
54 (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
55
56#ifndef STATIC
57# define STATIC noinline
58#endif
59
60#endif /* DEBUG */
61#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index f3227984a9bf..4bc3c649aee4 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -147,10 +147,9 @@ xfs_alloc_get_rec(
147 */ 147 */
148STATIC void 148STATIC void
149xfs_alloc_compute_aligned( 149xfs_alloc_compute_aligned(
150 xfs_alloc_arg_t *args, /* allocation argument structure */
150 xfs_agblock_t foundbno, /* starting block in found extent */ 151 xfs_agblock_t foundbno, /* starting block in found extent */
151 xfs_extlen_t foundlen, /* length in found extent */ 152 xfs_extlen_t foundlen, /* length in found extent */
152 xfs_extlen_t alignment, /* alignment for allocation */
153 xfs_extlen_t minlen, /* minimum length for allocation */
154 xfs_agblock_t *resbno, /* result block number */ 153 xfs_agblock_t *resbno, /* result block number */
155 xfs_extlen_t *reslen) /* result length */ 154 xfs_extlen_t *reslen) /* result length */
156{ 155{
@@ -158,8 +157,8 @@ xfs_alloc_compute_aligned(
158 xfs_extlen_t diff; 157 xfs_extlen_t diff;
159 xfs_extlen_t len; 158 xfs_extlen_t len;
160 159
161 if (alignment > 1 && foundlen >= minlen) { 160 if (args->alignment > 1 && foundlen >= args->minlen) {
162 bno = roundup(foundbno, alignment); 161 bno = roundup(foundbno, args->alignment);
163 diff = bno - foundbno; 162 diff = bno - foundbno;
164 len = diff >= foundlen ? 0 : foundlen - diff; 163 len = diff >= foundlen ? 0 : foundlen - diff;
165 } else { 164 } else {
@@ -464,6 +463,27 @@ xfs_alloc_read_agfl(
464 return 0; 463 return 0;
465} 464}
466 465
466STATIC int
467xfs_alloc_update_counters(
468 struct xfs_trans *tp,
469 struct xfs_perag *pag,
470 struct xfs_buf *agbp,
471 long len)
472{
473 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp);
474
475 pag->pagf_freeblks += len;
476 be32_add_cpu(&agf->agf_freeblks, len);
477
478 xfs_trans_agblocks_delta(tp, len);
479 if (unlikely(be32_to_cpu(agf->agf_freeblks) >
480 be32_to_cpu(agf->agf_length)))
481 return EFSCORRUPTED;
482
483 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
484 return 0;
485}
486
467/* 487/*
468 * Allocation group level functions. 488 * Allocation group level functions.
469 */ 489 */
@@ -505,49 +525,44 @@ xfs_alloc_ag_vextent(
505 ASSERT(0); 525 ASSERT(0);
506 /* NOTREACHED */ 526 /* NOTREACHED */
507 } 527 }
508 if (error) 528
529 if (error || args->agbno == NULLAGBLOCK)
509 return error; 530 return error;
510 /*
511 * If the allocation worked, need to change the agf structure
512 * (and log it), and the superblock.
513 */
514 if (args->agbno != NULLAGBLOCK) {
515 xfs_agf_t *agf; /* allocation group freelist header */
516 long slen = (long)args->len;
517 531
518 ASSERT(args->len >= args->minlen && args->len <= args->maxlen); 532 ASSERT(args->len >= args->minlen);
519 ASSERT(!(args->wasfromfl) || !args->isfl); 533 ASSERT(args->len <= args->maxlen);
520 ASSERT(args->agbno % args->alignment == 0); 534 ASSERT(!args->wasfromfl || !args->isfl);
521 if (!(args->wasfromfl)) { 535 ASSERT(args->agbno % args->alignment == 0);
522 536
523 agf = XFS_BUF_TO_AGF(args->agbp); 537 if (!args->wasfromfl) {
524 be32_add_cpu(&agf->agf_freeblks, -(args->len)); 538 error = xfs_alloc_update_counters(args->tp, args->pag,
525 xfs_trans_agblocks_delta(args->tp, 539 args->agbp,
526 -((long)(args->len))); 540 -((long)(args->len)));
527 args->pag->pagf_freeblks -= args->len; 541 if (error)
528 ASSERT(be32_to_cpu(agf->agf_freeblks) <= 542 return error;
529 be32_to_cpu(agf->agf_length)); 543
530 xfs_alloc_log_agf(args->tp, args->agbp, 544 /*
531 XFS_AGF_FREEBLKS); 545 * Search the busylist for these blocks and mark the
532 /* 546 * transaction as synchronous if blocks are found. This
533 * Search the busylist for these blocks and mark the 547 * avoids the need to block due to a synchronous log
534 * transaction as synchronous if blocks are found. This 548 * force to ensure correct ordering as the synchronous
535 * avoids the need to block due to a synchronous log 549 * transaction will guarantee that for us.
536 * force to ensure correct ordering as the synchronous 550 */
537 * transaction will guarantee that for us. 551 if (xfs_alloc_busy_search(args->mp, args->agno,
538 */ 552 args->agbno, args->len))
539 if (xfs_alloc_busy_search(args->mp, args->agno, 553 xfs_trans_set_sync(args->tp);
540 args->agbno, args->len))
541 xfs_trans_set_sync(args->tp);
542 }
543 if (!args->isfl)
544 xfs_trans_mod_sb(args->tp,
545 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
546 XFS_TRANS_SB_FDBLOCKS, -slen);
547 XFS_STATS_INC(xs_allocx);
548 XFS_STATS_ADD(xs_allocb, args->len);
549 } 554 }
550 return 0; 555
556 if (!args->isfl) {
557 xfs_trans_mod_sb(args->tp, args->wasdel ?
558 XFS_TRANS_SB_RES_FDBLOCKS :
559 XFS_TRANS_SB_FDBLOCKS,
560 -((long)(args->len)));
561 }
562
563 XFS_STATS_INC(xs_allocx);
564 XFS_STATS_ADD(xs_allocb, args->len);
565 return error;
551} 566}
552 567
553/* 568/*
@@ -693,8 +708,7 @@ xfs_alloc_find_best_extent(
693 if (error) 708 if (error)
694 goto error0; 709 goto error0;
695 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
696 xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, 711 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena);
697 args->minlen, &bno, slena);
698 712
699 /* 713 /*
700 * The good extent is closer than this one. 714 * The good extent is closer than this one.
@@ -866,8 +880,8 @@ xfs_alloc_ag_vextent_near(
866 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 880 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
867 goto error0; 881 goto error0;
868 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 882 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
869 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 883 xfs_alloc_compute_aligned(args, ltbno, ltlen,
870 args->minlen, &ltbnoa, &ltlena); 884 &ltbnoa, &ltlena);
871 if (ltlena < args->minlen) 885 if (ltlena < args->minlen)
872 continue; 886 continue;
873 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 887 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
@@ -987,8 +1001,8 @@ xfs_alloc_ag_vextent_near(
987 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i))) 1001 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
988 goto error0; 1002 goto error0;
989 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1003 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
990 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, 1004 xfs_alloc_compute_aligned(args, ltbno, ltlen,
991 args->minlen, &ltbnoa, &ltlena); 1005 &ltbnoa, &ltlena);
992 if (ltlena >= args->minlen) 1006 if (ltlena >= args->minlen)
993 break; 1007 break;
994 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1008 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
@@ -1003,8 +1017,8 @@ xfs_alloc_ag_vextent_near(
1003 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i))) 1017 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1004 goto error0; 1018 goto error0;
1005 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1019 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1006 xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, 1020 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1007 args->minlen, &gtbnoa, &gtlena); 1021 &gtbnoa, &gtlena);
1008 if (gtlena >= args->minlen) 1022 if (gtlena >= args->minlen)
1009 break; 1023 break;
1010 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1024 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
@@ -1183,8 +1197,7 @@ xfs_alloc_ag_vextent_size(
1183 * once aligned; if not, we search left for something better. 1197 * once aligned; if not, we search left for something better.
1184 * This can't happen in the second case above. 1198 * This can't happen in the second case above.
1185 */ 1199 */
1186 xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, 1200 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1187 &rbno, &rlen);
1188 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1201 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1189 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1202 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1190 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1203 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1209,8 +1222,8 @@ xfs_alloc_ag_vextent_size(
1209 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1222 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1210 if (flen < bestrlen) 1223 if (flen < bestrlen)
1211 break; 1224 break;
1212 xfs_alloc_compute_aligned(fbno, flen, args->alignment, 1225 xfs_alloc_compute_aligned(args, fbno, flen,
1213 args->minlen, &rbno, &rlen); 1226 &rbno, &rlen);
1214 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1227 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1215 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1228 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1216 (rlen <= flen && rbno + rlen <= fbno + flen), 1229 (rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1388,6 +1401,7 @@ xfs_free_ag_extent(
1388 xfs_mount_t *mp; /* mount point struct for filesystem */ 1401 xfs_mount_t *mp; /* mount point struct for filesystem */
1389 xfs_agblock_t nbno; /* new starting block of freespace */ 1402 xfs_agblock_t nbno; /* new starting block of freespace */
1390 xfs_extlen_t nlen; /* new length of freespace */ 1403 xfs_extlen_t nlen; /* new length of freespace */
1404 xfs_perag_t *pag; /* per allocation group data */
1391 1405
1392 mp = tp->t_mountp; 1406 mp = tp->t_mountp;
1393 /* 1407 /*
@@ -1586,30 +1600,20 @@ xfs_free_ag_extent(
1586 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1600 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1587 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1601 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1588 cnt_cur = NULL; 1602 cnt_cur = NULL;
1603
1589 /* 1604 /*
1590 * Update the freespace totals in the ag and superblock. 1605 * Update the freespace totals in the ag and superblock.
1591 */ 1606 */
1592 { 1607 pag = xfs_perag_get(mp, agno);
1593 xfs_agf_t *agf; 1608 error = xfs_alloc_update_counters(tp, pag, agbp, len);
1594 xfs_perag_t *pag; /* per allocation group data */ 1609 xfs_perag_put(pag);
1595 1610 if (error)
1596 pag = xfs_perag_get(mp, agno); 1611 goto error0;
1597 pag->pagf_freeblks += len; 1612
1598 xfs_perag_put(pag); 1613 if (!isfl)
1599 1614 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1600 agf = XFS_BUF_TO_AGF(agbp); 1615 XFS_STATS_INC(xs_freex);
1601 be32_add_cpu(&agf->agf_freeblks, len); 1616 XFS_STATS_ADD(xs_freeb, len);
1602 xfs_trans_agblocks_delta(tp, len);
1603 XFS_WANT_CORRUPTED_GOTO(
1604 be32_to_cpu(agf->agf_freeblks) <=
1605 be32_to_cpu(agf->agf_length),
1606 error0);
1607 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1608 if (!isfl)
1609 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1610 XFS_STATS_INC(xs_freex);
1611 XFS_STATS_ADD(xs_freeb, len);
1612 }
1613 1617
1614 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1618 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1615 1619
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index dc3afd7739ff..fa00788de2f5 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2365,6 +2365,13 @@ xfs_bmap_rtalloc(
2365 */ 2365 */
2366 if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) 2366 if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
2367 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; 2367 ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
2368
2369 /*
2370 * Lock out other modifications to the RT bitmap inode.
2371 */
2372 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2373 xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
2374
2368 /* 2375 /*
2369 * If it's an allocation to an empty file at offset 0, 2376 * If it's an allocation to an empty file at offset 0,
2370 * pick an extent that will space things out in the rt area. 2377 * pick an extent that will space things out in the rt area.
@@ -3519,7 +3526,7 @@ xfs_bmap_search_extents(
3519 3526
3520 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) && 3527 if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
3521 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) { 3528 !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
3522 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, 3529 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
3523 "Access to block zero in inode %llu " 3530 "Access to block zero in inode %llu "
3524 "start_block: %llx start_off: %llx " 3531 "start_block: %llx start_off: %llx "
3525 "blkcnt: %llx extent-state: %x lastx: %x\n", 3532 "blkcnt: %llx extent-state: %x lastx: %x\n",
@@ -4193,12 +4200,11 @@ xfs_bmap_read_extents(
4193 num_recs = xfs_btree_get_numrecs(block); 4200 num_recs = xfs_btree_get_numrecs(block);
4194 if (unlikely(i + num_recs > room)) { 4201 if (unlikely(i + num_recs > room)) {
4195 ASSERT(i + num_recs <= room); 4202 ASSERT(i + num_recs <= room);
4196 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 4203 xfs_warn(ip->i_mount,
4197 "corrupt dinode %Lu, (btree extents).", 4204 "corrupt dinode %Lu, (btree extents).",
4198 (unsigned long long) ip->i_ino); 4205 (unsigned long long) ip->i_ino);
4199 XFS_ERROR_REPORT("xfs_bmap_read_extents(1)", 4206 XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
4200 XFS_ERRLEVEL_LOW, 4207 XFS_ERRLEVEL_LOW, ip->i_mount, block);
4201 ip->i_mount);
4202 goto error0; 4208 goto error0;
4203 } 4209 }
4204 XFS_WANT_CORRUPTED_GOTO( 4210 XFS_WANT_CORRUPTED_GOTO(
@@ -5772,7 +5778,7 @@ xfs_check_block(
5772 else 5778 else
5773 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr); 5779 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
5774 if (*thispa == *pp) { 5780 if (*thispa == *pp) {
5775 cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld", 5781 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
5776 __func__, j, i, 5782 __func__, j, i,
5777 (unsigned long long)be64_to_cpu(*thispa)); 5783 (unsigned long long)be64_to_cpu(*thispa));
5778 panic("%s: ptrs are equal in node\n", 5784 panic("%s: ptrs are equal in node\n",
@@ -5937,11 +5943,11 @@ xfs_bmap_check_leaf_extents(
5937 return; 5943 return;
5938 5944
5939error0: 5945error0:
5940 cmn_err(CE_WARN, "%s: at error0", __func__); 5946 xfs_warn(mp, "%s: at error0", __func__);
5941 if (bp_release) 5947 if (bp_release)
5942 xfs_trans_brelse(NULL, bp); 5948 xfs_trans_brelse(NULL, bp);
5943error_norelse: 5949error_norelse:
5944 cmn_err(CE_WARN, "%s: BAD after btree leaves for %d extents", 5950 xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
5945 __func__, i); 5951 __func__, i);
5946 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__); 5952 panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
5947 return; 5953 return;
@@ -6144,7 +6150,7 @@ xfs_bmap_punch_delalloc_range(
6144 if (error) { 6150 if (error) {
6145 /* something screwed, just bail */ 6151 /* something screwed, just bail */
6146 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 6152 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
6147 xfs_fs_cmn_err(CE_ALERT, ip->i_mount, 6153 xfs_alert(ip->i_mount,
6148 "Failed delalloc mapping lookup ino %lld fsb %lld.", 6154 "Failed delalloc mapping lookup ino %lld fsb %lld.",
6149 ip->i_ino, start_fsb); 6155 ip->i_ino, start_fsb);
6150 } 6156 }
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 6f8c21ce0d6d..e5413d96f1af 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -130,10 +130,12 @@ xfs_buf_item_log_check(
130 orig = bip->bli_orig; 130 orig = bip->bli_orig;
131 buffer = XFS_BUF_PTR(bp); 131 buffer = XFS_BUF_PTR(bp);
132 for (x = 0; x < XFS_BUF_COUNT(bp); x++) { 132 for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
133 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) 133 if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
134 cmn_err(CE_PANIC, 134 xfs_emerg(bp->b_mount,
135 "xfs_buf_item_log_check bip %x buffer %x orig %x index %d", 135 "%s: bip %x buffer %x orig %x index %d",
136 bip, bp, orig, x); 136 __func__, bip, bp, orig, x);
137 ASSERT(0);
138 }
137 } 139 }
138} 140}
139#else 141#else
@@ -983,10 +985,9 @@ xfs_buf_iodone_callbacks(
983 if (XFS_BUF_TARGET(bp) != lasttarg || 985 if (XFS_BUF_TARGET(bp) != lasttarg ||
984 time_after(jiffies, (lasttime + 5*HZ))) { 986 time_after(jiffies, (lasttime + 5*HZ))) {
985 lasttime = jiffies; 987 lasttime = jiffies;
986 cmn_err(CE_ALERT, "Device %s, XFS metadata write error" 988 xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
987 " block 0x%llx in %s",
988 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), 989 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
989 (__uint64_t)XFS_BUF_ADDR(bp), mp->m_fsname); 990 (__uint64_t)XFS_BUF_ADDR(bp));
990 } 991 }
991 lasttarg = XFS_BUF_TARGET(bp); 992 lasttarg = XFS_BUF_TARGET(bp);
992 993
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 1c00bedb3175..6102ac6d1dff 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1995,13 +1995,12 @@ xfs_da_do_buf(
1995 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); 1995 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
1996 if (unlikely(error == EFSCORRUPTED)) { 1996 if (unlikely(error == EFSCORRUPTED)) {
1997 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 1997 if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
1998 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", 1998 xfs_alert(mp, "%s: bno %lld dir: inode %lld",
1999 (long long)bno); 1999 __func__, (long long)bno,
2000 cmn_err(CE_ALERT, "dir: inode %lld\n",
2001 (long long)dp->i_ino); 2000 (long long)dp->i_ino);
2002 for (i = 0; i < nmap; i++) { 2001 for (i = 0; i < nmap; i++) {
2003 cmn_err(CE_ALERT, 2002 xfs_alert(mp,
2004 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n", 2003"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
2005 i, 2004 i,
2006 (long long)mapp[i].br_startoff, 2005 (long long)mapp[i].br_startoff,
2007 (long long)mapp[i].br_startblock, 2006 (long long)mapp[i].br_startblock,
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index e60490bc00a6..be628677c288 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -270,9 +270,9 @@ xfs_swap_extents(
270 /* check inode formats now that data is flushed */ 270 /* check inode formats now that data is flushed */
271 error = xfs_swap_extents_check_format(ip, tip); 271 error = xfs_swap_extents_check_format(ip, tip);
272 if (error) { 272 if (error) {
273 xfs_fs_cmn_err(CE_NOTE, mp, 273 xfs_notice(mp,
274 "%s: inode 0x%llx format is incompatible for exchanging.", 274 "%s: inode 0x%llx format is incompatible for exchanging.",
275 __FILE__, ip->i_ino); 275 __func__, ip->i_ino);
276 goto out_unlock; 276 goto out_unlock;
277 } 277 }
278 278
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index a1321bc7f192..dba7a71cedf3 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -159,7 +159,7 @@ xfs_dir_ino_validate(
159 XFS_AGINO_TO_INO(mp, agno, agino) == ino; 159 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
160 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, 160 if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
161 XFS_RANDOM_DIR_INO_VALIDATE))) { 161 XFS_RANDOM_DIR_INO_VALIDATE))) {
162 xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", 162 xfs_warn(mp, "Invalid inode number 0x%Lx",
163 (unsigned long long) ino); 163 (unsigned long long) ino);
164 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); 164 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
165 return XFS_ERROR(EFSCORRUPTED); 165 return XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index f9a0864b696a..a0aab7d3294f 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -899,10 +899,9 @@ xfs_dir2_leafn_rebalance(
899 if(blk2->index < 0) { 899 if(blk2->index < 0) {
900 state->inleaf = 1; 900 state->inleaf = 1;
901 blk2->index = 0; 901 blk2->index = 0;
902 cmn_err(CE_ALERT, 902 xfs_alert(args->dp->i_mount,
903 "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: " 903 "%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n",
904 "blk1->index %d\n", 904 __func__, blk1->index);
905 blk1->index);
906 } 905 }
907} 906}
908 907
@@ -1641,26 +1640,22 @@ xfs_dir2_node_addname_int(
1641 } 1640 }
1642 1641
1643 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { 1642 if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
1644 cmn_err(CE_ALERT, 1643 xfs_alert(mp,
1645 "xfs_dir2_node_addname_int: dir ino " 1644 "%s: dir ino " "%llu needed freesp block %lld for\n"
1646 "%llu needed freesp block %lld for\n" 1645 " data block %lld, got %lld ifbno %llu lastfbno %d",
1647 " data block %lld, got %lld\n" 1646 __func__, (unsigned long long)dp->i_ino,
1648 " ifbno %llu lastfbno %d\n",
1649 (unsigned long long)dp->i_ino,
1650 (long long)xfs_dir2_db_to_fdb(mp, dbno), 1647 (long long)xfs_dir2_db_to_fdb(mp, dbno),
1651 (long long)dbno, (long long)fbno, 1648 (long long)dbno, (long long)fbno,
1652 (unsigned long long)ifbno, lastfbno); 1649 (unsigned long long)ifbno, lastfbno);
1653 if (fblk) { 1650 if (fblk) {
1654 cmn_err(CE_ALERT, 1651 xfs_alert(mp,
1655 " fblk 0x%p blkno %llu " 1652 " fblk 0x%p blkno %llu index %d magic 0x%x",
1656 "index %d magic 0x%x\n",
1657 fblk, 1653 fblk,
1658 (unsigned long long)fblk->blkno, 1654 (unsigned long long)fblk->blkno,
1659 fblk->index, 1655 fblk->index,
1660 fblk->magic); 1656 fblk->magic);
1661 } else { 1657 } else {
1662 cmn_err(CE_ALERT, 1658 xfs_alert(mp, " ... fblk is NULL");
1663 " ... fblk is NULL\n");
1664 } 1659 }
1665 XFS_ERROR_REPORT("xfs_dir2_node_addname_int", 1660 XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
1666 XFS_ERRLEVEL_LOW, mp); 1661 XFS_ERRLEVEL_LOW, mp);
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 4c7db74a05f7..39f06336b99d 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -48,7 +48,7 @@ xfs_error_trap(int e)
48 break; 48 break;
49 if (e != xfs_etrap[i]) 49 if (e != xfs_etrap[i])
50 continue; 50 continue;
51 cmn_err(CE_NOTE, "xfs_error_trap: error %d", e); 51 xfs_notice(NULL, "%s: error %d", __func__, e);
52 BUG(); 52 BUG();
53 break; 53 break;
54 } 54 }
@@ -74,7 +74,7 @@ xfs_error_test(int error_tag, int *fsidp, char *expression,
74 74
75 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 75 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
76 if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) { 76 if (xfs_etest[i] == error_tag && xfs_etest_fsid[i] == fsid) {
77 cmn_err(CE_WARN, 77 xfs_warn(NULL,
78 "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"", 78 "Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
79 expression, file, line, xfs_etest_fsname[i]); 79 expression, file, line, xfs_etest_fsname[i]);
80 return 1; 80 return 1;
@@ -95,14 +95,14 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
95 95
96 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 96 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
97 if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) { 97 if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
98 cmn_err(CE_WARN, "XFS error tag #%d on", error_tag); 98 xfs_warn(mp, "error tag #%d on", error_tag);
99 return 0; 99 return 0;
100 } 100 }
101 } 101 }
102 102
103 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) { 103 for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
104 if (xfs_etest[i] == 0) { 104 if (xfs_etest[i] == 0) {
105 cmn_err(CE_WARN, "Turned on XFS error tag #%d", 105 xfs_warn(mp, "Turned on XFS error tag #%d",
106 error_tag); 106 error_tag);
107 xfs_etest[i] = error_tag; 107 xfs_etest[i] = error_tag;
108 xfs_etest_fsid[i] = fsid; 108 xfs_etest_fsid[i] = fsid;
@@ -114,7 +114,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
114 } 114 }
115 } 115 }
116 116
117 cmn_err(CE_WARN, "error tag overflow, too many turned on"); 117 xfs_warn(mp, "error tag overflow, too many turned on");
118 118
119 return 1; 119 return 1;
120} 120}
@@ -133,7 +133,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
133 if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) && 133 if ((fsid == 0LL || xfs_etest_fsid[i] == fsid) &&
134 xfs_etest[i] != 0) { 134 xfs_etest[i] != 0) {
135 cleared = 1; 135 cleared = 1;
136 cmn_err(CE_WARN, "Clearing XFS error tag #%d", 136 xfs_warn(mp, "Clearing XFS error tag #%d",
137 xfs_etest[i]); 137 xfs_etest[i]);
138 xfs_etest[i] = 0; 138 xfs_etest[i] = 0;
139 xfs_etest_fsid[i] = 0LL; 139 xfs_etest_fsid[i] = 0LL;
@@ -144,9 +144,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
144 } 144 }
145 145
146 if (loud || cleared) 146 if (loud || cleared)
147 cmn_err(CE_WARN, 147 xfs_warn(mp, "Cleared all XFS error tags for filesystem");
148 "Cleared all XFS error tags for filesystem \"%s\"",
149 mp->m_fsname);
150 148
151 return 0; 149 return 0;
152} 150}
@@ -162,9 +160,8 @@ xfs_error_report(
162 inst_t *ra) 160 inst_t *ra)
163{ 161{
164 if (level <= xfs_error_level) { 162 if (level <= xfs_error_level) {
165 xfs_cmn_err(XFS_PTAG_ERROR_REPORT, 163 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
166 CE_ALERT, mp, 164 "Internal error %s at line %d of file %s. Caller 0x%p\n",
167 "XFS internal error %s at line %d of file %s. Caller 0x%p\n",
168 tag, linenum, filename, ra); 165 tag, linenum, filename, ra);
169 166
170 xfs_stack_trace(); 167 xfs_stack_trace();
@@ -184,4 +181,5 @@ xfs_corruption_error(
184 if (level <= xfs_error_level) 181 if (level <= xfs_error_level)
185 xfs_hex_dump(p, 16); 182 xfs_hex_dump(p, 16);
186 xfs_error_report(tag, level, mp, filename, linenum, ra); 183 xfs_error_report(tag, level, mp, filename, linenum, ra);
184 xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
187} 185}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 10dce5475f02..079a367f44ee 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -145,10 +145,8 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
145#endif /* DEBUG */ 145#endif /* DEBUG */
146 146
147/* 147/*
148 * XFS panic tags -- allow a call to xfs_cmn_err() be turned into 148 * XFS panic tags -- allow a call to xfs_alert_tag() be turned into
149 * a panic by setting xfs_panic_mask in a 149 * a panic by setting xfs_panic_mask in a sysctl.
150 * sysctl. update xfs_max[XFS_PARAM] if
151 * more are added.
152 */ 150 */
153#define XFS_NO_PTAG 0 151#define XFS_NO_PTAG 0
154#define XFS_PTAG_IFLUSH 0x00000001 152#define XFS_PTAG_IFLUSH 0x00000001
@@ -160,17 +158,4 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp, int loud);
160#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040 158#define XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040
161#define XFS_PTAG_FSBLOCK_ZERO 0x00000080 159#define XFS_PTAG_FSBLOCK_ZERO 0x00000080
162 160
163struct xfs_mount;
164
165extern void xfs_hex_dump(void *p, int length);
166
167#define xfs_fs_repair_cmn_err(level, mp, fmt, args...) \
168 xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
169
170#define xfs_fs_mount_cmn_err(f, fmt, args...) \
171 do { \
172 if (!(f & XFS_MFSI_QUIET)) \
173 cmn_err(CE_WARN, "XFS: " fmt, ## args); \
174 } while (0)
175
176#endif /* __XFS_ERROR_H__ */ 161#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 85668efb3e3e..9153d2c77caf 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -385,8 +385,8 @@ xfs_growfs_data_private(
385 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)), 385 XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
386 XFS_FSS_TO_BB(mp, 1), 0, &bp); 386 XFS_FSS_TO_BB(mp, 1), 0, &bp);
387 if (error) { 387 if (error) {
388 xfs_fs_cmn_err(CE_WARN, mp, 388 xfs_warn(mp,
389 "error %d reading secondary superblock for ag %d", 389 "error %d reading secondary superblock for ag %d",
390 error, agno); 390 error, agno);
391 break; 391 break;
392 } 392 }
@@ -399,7 +399,7 @@ xfs_growfs_data_private(
399 if (!(error = xfs_bwrite(mp, bp))) { 399 if (!(error = xfs_bwrite(mp, bp))) {
400 continue; 400 continue;
401 } else { 401 } else {
402 xfs_fs_cmn_err(CE_WARN, mp, 402 xfs_warn(mp,
403 "write error %d updating secondary superblock for ag %d", 403 "write error %d updating secondary superblock for ag %d",
404 error, agno); 404 error, agno);
405 break; /* no point in continuing */ 405 break; /* no point in continuing */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 0626a32c3447..84ebeec16642 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1055,28 +1055,23 @@ xfs_difree(
1055 */ 1055 */
1056 agno = XFS_INO_TO_AGNO(mp, inode); 1056 agno = XFS_INO_TO_AGNO(mp, inode);
1057 if (agno >= mp->m_sb.sb_agcount) { 1057 if (agno >= mp->m_sb.sb_agcount) {
1058 cmn_err(CE_WARN, 1058 xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).",
1059 "xfs_difree: agno >= mp->m_sb.sb_agcount (%d >= %d) on %s. Returning EINVAL.", 1059 __func__, agno, mp->m_sb.sb_agcount);
1060 agno, mp->m_sb.sb_agcount, mp->m_fsname);
1061 ASSERT(0); 1060 ASSERT(0);
1062 return XFS_ERROR(EINVAL); 1061 return XFS_ERROR(EINVAL);
1063 } 1062 }
1064 agino = XFS_INO_TO_AGINO(mp, inode); 1063 agino = XFS_INO_TO_AGINO(mp, inode);
1065 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) { 1064 if (inode != XFS_AGINO_TO_INO(mp, agno, agino)) {
1066 cmn_err(CE_WARN, 1065 xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
1067 "xfs_difree: inode != XFS_AGINO_TO_INO() " 1066 __func__, (unsigned long long)inode,
1068 "(%llu != %llu) on %s. Returning EINVAL.", 1067 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino));
1069 (unsigned long long)inode,
1070 (unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino),
1071 mp->m_fsname);
1072 ASSERT(0); 1068 ASSERT(0);
1073 return XFS_ERROR(EINVAL); 1069 return XFS_ERROR(EINVAL);
1074 } 1070 }
1075 agbno = XFS_AGINO_TO_AGBNO(mp, agino); 1071 agbno = XFS_AGINO_TO_AGBNO(mp, agino);
1076 if (agbno >= mp->m_sb.sb_agblocks) { 1072 if (agbno >= mp->m_sb.sb_agblocks) {
1077 cmn_err(CE_WARN, 1073 xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
1078 "xfs_difree: agbno >= mp->m_sb.sb_agblocks (%d >= %d) on %s. Returning EINVAL.", 1074 __func__, agbno, mp->m_sb.sb_agblocks);
1079 agbno, mp->m_sb.sb_agblocks, mp->m_fsname);
1080 ASSERT(0); 1075 ASSERT(0);
1081 return XFS_ERROR(EINVAL); 1076 return XFS_ERROR(EINVAL);
1082 } 1077 }
@@ -1085,9 +1080,8 @@ xfs_difree(
1085 */ 1080 */
1086 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1081 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1087 if (error) { 1082 if (error) {
1088 cmn_err(CE_WARN, 1083 xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
1089 "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", 1084 __func__, error);
1090 error, mp->m_fsname);
1091 return error; 1085 return error;
1092 } 1086 }
1093 agi = XFS_BUF_TO_AGI(agbp); 1087 agi = XFS_BUF_TO_AGI(agbp);
@@ -1106,17 +1100,15 @@ xfs_difree(
1106 * Look for the entry describing this inode. 1100 * Look for the entry describing this inode.
1107 */ 1101 */
1108 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) { 1102 if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
1109 cmn_err(CE_WARN, 1103 xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
1110 "xfs_difree: xfs_inobt_lookup returned() an error %d on %s. Returning error.", 1104 __func__, error);
1111 error, mp->m_fsname);
1112 goto error0; 1105 goto error0;
1113 } 1106 }
1114 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1107 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1115 error = xfs_inobt_get_rec(cur, &rec, &i); 1108 error = xfs_inobt_get_rec(cur, &rec, &i);
1116 if (error) { 1109 if (error) {
1117 cmn_err(CE_WARN, 1110 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
1118 "xfs_difree: xfs_inobt_get_rec() returned an error %d on %s. Returning error.", 1111 __func__, error);
1119 error, mp->m_fsname);
1120 goto error0; 1112 goto error0;
1121 } 1113 }
1122 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1114 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
@@ -1157,8 +1149,8 @@ xfs_difree(
1157 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); 1149 xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
1158 1150
1159 if ((error = xfs_btree_delete(cur, &i))) { 1151 if ((error = xfs_btree_delete(cur, &i))) {
1160 cmn_err(CE_WARN, "xfs_difree: xfs_btree_delete returned an error %d on %s.\n", 1152 xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
1161 error, mp->m_fsname); 1153 __func__, error);
1162 goto error0; 1154 goto error0;
1163 } 1155 }
1164 1156
@@ -1170,9 +1162,8 @@ xfs_difree(
1170 1162
1171 error = xfs_inobt_update(cur, &rec); 1163 error = xfs_inobt_update(cur, &rec);
1172 if (error) { 1164 if (error) {
1173 cmn_err(CE_WARN, 1165 xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
1174 "xfs_difree: xfs_inobt_update returned an error %d on %s.", 1166 __func__, error);
1175 error, mp->m_fsname);
1176 goto error0; 1167 goto error0;
1177 } 1168 }
1178 1169
@@ -1218,10 +1209,9 @@ xfs_imap_lookup(
1218 1209
1219 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 1210 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
1220 if (error) { 1211 if (error) {
1221 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1212 xfs_alert(mp,
1222 "xfs_ialloc_read_agi() returned " 1213 "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
1223 "error %d, agno %d", 1214 __func__, error, agno);
1224 error, agno);
1225 return error; 1215 return error;
1226 } 1216 }
1227 1217
@@ -1299,24 +1289,21 @@ xfs_imap(
1299 if (flags & XFS_IGET_UNTRUSTED) 1289 if (flags & XFS_IGET_UNTRUSTED)
1300 return XFS_ERROR(EINVAL); 1290 return XFS_ERROR(EINVAL);
1301 if (agno >= mp->m_sb.sb_agcount) { 1291 if (agno >= mp->m_sb.sb_agcount) {
1302 xfs_fs_cmn_err(CE_ALERT, mp, 1292 xfs_alert(mp,
1303 "xfs_imap: agno (%d) >= " 1293 "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)",
1304 "mp->m_sb.sb_agcount (%d)", 1294 __func__, agno, mp->m_sb.sb_agcount);
1305 agno, mp->m_sb.sb_agcount);
1306 } 1295 }
1307 if (agbno >= mp->m_sb.sb_agblocks) { 1296 if (agbno >= mp->m_sb.sb_agblocks) {
1308 xfs_fs_cmn_err(CE_ALERT, mp, 1297 xfs_alert(mp,
1309 "xfs_imap: agbno (0x%llx) >= " 1298 "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
1310 "mp->m_sb.sb_agblocks (0x%lx)", 1299 __func__, (unsigned long long)agbno,
1311 (unsigned long long) agbno, 1300 (unsigned long)mp->m_sb.sb_agblocks);
1312 (unsigned long) mp->m_sb.sb_agblocks);
1313 } 1301 }
1314 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) { 1302 if (ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
1315 xfs_fs_cmn_err(CE_ALERT, mp, 1303 xfs_alert(mp,
1316 "xfs_imap: ino (0x%llx) != " 1304 "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
1317 "XFS_AGINO_TO_INO(mp, agno, agino) " 1305 __func__, ino,
1318 "(0x%llx)", 1306 XFS_AGINO_TO_INO(mp, agno, agino));
1319 ino, XFS_AGINO_TO_INO(mp, agno, agino));
1320 } 1307 }
1321 xfs_stack_trace(); 1308 xfs_stack_trace();
1322#endif /* DEBUG */ 1309#endif /* DEBUG */
@@ -1388,10 +1375,9 @@ out_map:
1388 */ 1375 */
1389 if ((imap->im_blkno + imap->im_len) > 1376 if ((imap->im_blkno + imap->im_len) >
1390 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { 1377 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
1391 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " 1378 xfs_alert(mp,
1392 "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " 1379 "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
1393 " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", 1380 __func__, (unsigned long long) imap->im_blkno,
1394 (unsigned long long) imap->im_blkno,
1395 (unsigned long long) imap->im_len, 1381 (unsigned long long) imap->im_len,
1396 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); 1382 XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
1397 return XFS_ERROR(EINVAL); 1383 return XFS_ERROR(EINVAL);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index be7cf625421f..da871f532236 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -110,8 +110,8 @@ xfs_inobp_check(
110 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 110 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
111 i * mp->m_sb.sb_inodesize); 111 i * mp->m_sb.sb_inodesize);
112 if (!dip->di_next_unlinked) { 112 if (!dip->di_next_unlinked) {
113 xfs_fs_cmn_err(CE_ALERT, mp, 113 xfs_alert(mp,
114 "Detected a bogus zero next_unlinked field in incore inode buffer 0x%p. About to pop an ASSERT.", 114 "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
115 bp); 115 bp);
116 ASSERT(dip->di_next_unlinked); 116 ASSERT(dip->di_next_unlinked);
117 } 117 }
@@ -142,10 +142,9 @@ xfs_imap_to_bp(
142 (int)imap->im_len, buf_flags, &bp); 142 (int)imap->im_len, buf_flags, &bp);
143 if (error) { 143 if (error) {
144 if (error != EAGAIN) { 144 if (error != EAGAIN) {
145 cmn_err(CE_WARN, 145 xfs_warn(mp,
146 "xfs_imap_to_bp: xfs_trans_read_buf()returned " 146 "%s: xfs_trans_read_buf() returned error %d.",
147 "an error %d on %s. Returning error.", 147 __func__, error);
148 error, mp->m_fsname);
149 } else { 148 } else {
150 ASSERT(buf_flags & XBF_TRYLOCK); 149 ASSERT(buf_flags & XBF_TRYLOCK);
151 } 150 }
@@ -180,12 +179,11 @@ xfs_imap_to_bp(
180 XFS_CORRUPTION_ERROR("xfs_imap_to_bp", 179 XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
181 XFS_ERRLEVEL_HIGH, mp, dip); 180 XFS_ERRLEVEL_HIGH, mp, dip);
182#ifdef DEBUG 181#ifdef DEBUG
183 cmn_err(CE_PANIC, 182 xfs_emerg(mp,
184 "Device %s - bad inode magic/vsn " 183 "bad inode magic/vsn daddr %lld #%d (magic=%x)",
185 "daddr %lld #%d (magic=%x)",
186 XFS_BUFTARG_NAME(mp->m_ddev_targp),
187 (unsigned long long)imap->im_blkno, i, 184 (unsigned long long)imap->im_blkno, i,
188 be16_to_cpu(dip->di_magic)); 185 be16_to_cpu(dip->di_magic));
186 ASSERT(0);
189#endif 187#endif
190 xfs_trans_brelse(tp, bp); 188 xfs_trans_brelse(tp, bp);
191 return XFS_ERROR(EFSCORRUPTED); 189 return XFS_ERROR(EFSCORRUPTED);
@@ -317,7 +315,7 @@ xfs_iformat(
317 if (unlikely(be32_to_cpu(dip->di_nextents) + 315 if (unlikely(be32_to_cpu(dip->di_nextents) +
318 be16_to_cpu(dip->di_anextents) > 316 be16_to_cpu(dip->di_anextents) >
319 be64_to_cpu(dip->di_nblocks))) { 317 be64_to_cpu(dip->di_nblocks))) {
320 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 318 xfs_warn(ip->i_mount,
321 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.", 319 "corrupt dinode %Lu, extent total = %d, nblocks = %Lu.",
322 (unsigned long long)ip->i_ino, 320 (unsigned long long)ip->i_ino,
323 (int)(be32_to_cpu(dip->di_nextents) + 321 (int)(be32_to_cpu(dip->di_nextents) +
@@ -330,8 +328,7 @@ xfs_iformat(
330 } 328 }
331 329
332 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) { 330 if (unlikely(dip->di_forkoff > ip->i_mount->m_sb.sb_inodesize)) {
333 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 331 xfs_warn(ip->i_mount, "corrupt dinode %Lu, forkoff = 0x%x.",
334 "corrupt dinode %Lu, forkoff = 0x%x.",
335 (unsigned long long)ip->i_ino, 332 (unsigned long long)ip->i_ino,
336 dip->di_forkoff); 333 dip->di_forkoff);
337 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW, 334 XFS_CORRUPTION_ERROR("xfs_iformat(2)", XFS_ERRLEVEL_LOW,
@@ -341,7 +338,7 @@ xfs_iformat(
341 338
342 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) && 339 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
343 !ip->i_mount->m_rtdev_targp)) { 340 !ip->i_mount->m_rtdev_targp)) {
344 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 341 xfs_warn(ip->i_mount,
345 "corrupt dinode %Lu, has realtime flag set.", 342 "corrupt dinode %Lu, has realtime flag set.",
346 ip->i_ino); 343 ip->i_ino);
347 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)", 344 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
@@ -373,9 +370,8 @@ xfs_iformat(
373 * no local regular files yet 370 * no local regular files yet
374 */ 371 */
375 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) { 372 if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
376 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 373 xfs_warn(ip->i_mount,
377 "corrupt inode %Lu " 374 "corrupt inode %Lu (local format for regular file).",
378 "(local format for regular file).",
379 (unsigned long long) ip->i_ino); 375 (unsigned long long) ip->i_ino);
380 XFS_CORRUPTION_ERROR("xfs_iformat(4)", 376 XFS_CORRUPTION_ERROR("xfs_iformat(4)",
381 XFS_ERRLEVEL_LOW, 377 XFS_ERRLEVEL_LOW,
@@ -385,9 +381,8 @@ xfs_iformat(
385 381
386 di_size = be64_to_cpu(dip->di_size); 382 di_size = be64_to_cpu(dip->di_size);
387 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) { 383 if (unlikely(di_size > XFS_DFORK_DSIZE(dip, ip->i_mount))) {
388 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 384 xfs_warn(ip->i_mount,
389 "corrupt inode %Lu " 385 "corrupt inode %Lu (bad size %Ld for local inode).",
390 "(bad size %Ld for local inode).",
391 (unsigned long long) ip->i_ino, 386 (unsigned long long) ip->i_ino,
392 (long long) di_size); 387 (long long) di_size);
393 XFS_CORRUPTION_ERROR("xfs_iformat(5)", 388 XFS_CORRUPTION_ERROR("xfs_iformat(5)",
@@ -431,9 +426,8 @@ xfs_iformat(
431 size = be16_to_cpu(atp->hdr.totsize); 426 size = be16_to_cpu(atp->hdr.totsize);
432 427
433 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) { 428 if (unlikely(size < sizeof(struct xfs_attr_sf_hdr))) {
434 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 429 xfs_warn(ip->i_mount,
435 "corrupt inode %Lu " 430 "corrupt inode %Lu (bad attr fork size %Ld).",
436 "(bad attr fork size %Ld).",
437 (unsigned long long) ip->i_ino, 431 (unsigned long long) ip->i_ino,
438 (long long) size); 432 (long long) size);
439 XFS_CORRUPTION_ERROR("xfs_iformat(8)", 433 XFS_CORRUPTION_ERROR("xfs_iformat(8)",
@@ -488,9 +482,8 @@ xfs_iformat_local(
488 * kmem_alloc() or memcpy() below. 482 * kmem_alloc() or memcpy() below.
489 */ 483 */
490 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 484 if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
491 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 485 xfs_warn(ip->i_mount,
492 "corrupt inode %Lu " 486 "corrupt inode %Lu (bad size %d for local fork, size = %d).",
493 "(bad size %d for local fork, size = %d).",
494 (unsigned long long) ip->i_ino, size, 487 (unsigned long long) ip->i_ino, size,
495 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)); 488 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork));
496 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW, 489 XFS_CORRUPTION_ERROR("xfs_iformat_local", XFS_ERRLEVEL_LOW,
@@ -547,8 +540,7 @@ xfs_iformat_extents(
547 * kmem_alloc() or memcpy() below. 540 * kmem_alloc() or memcpy() below.
548 */ 541 */
549 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { 542 if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
550 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 543 xfs_warn(ip->i_mount, "corrupt inode %Lu ((a)extents = %d).",
551 "corrupt inode %Lu ((a)extents = %d).",
552 (unsigned long long) ip->i_ino, nex); 544 (unsigned long long) ip->i_ino, nex);
553 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW, 545 XFS_CORRUPTION_ERROR("xfs_iformat_extents(1)", XFS_ERRLEVEL_LOW,
554 ip->i_mount, dip); 546 ip->i_mount, dip);
@@ -623,11 +615,10 @@ xfs_iformat_btree(
623 || XFS_BMDR_SPACE_CALC(nrecs) > 615 || XFS_BMDR_SPACE_CALC(nrecs) >
624 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) 616 XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
625 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { 617 || XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
626 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount, 618 xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
627 "corrupt inode %Lu (btree).",
628 (unsigned long long) ip->i_ino); 619 (unsigned long long) ip->i_ino);
629 XFS_ERROR_REPORT("xfs_iformat_btree", XFS_ERRLEVEL_LOW, 620 XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
630 ip->i_mount); 621 ip->i_mount, dip);
631 return XFS_ERROR(EFSCORRUPTED); 622 return XFS_ERROR(EFSCORRUPTED);
632 } 623 }
633 624
@@ -813,11 +804,9 @@ xfs_iread(
813 */ 804 */
814 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { 805 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
815#ifdef DEBUG 806#ifdef DEBUG
816 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 807 xfs_alert(mp,
817 "dip->di_magic (0x%x) != " 808 "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
818 "XFS_DINODE_MAGIC (0x%x)", 809 __func__, be16_to_cpu(dip->di_magic), XFS_DINODE_MAGIC);
819 be16_to_cpu(dip->di_magic),
820 XFS_DINODE_MAGIC);
821#endif /* DEBUG */ 810#endif /* DEBUG */
822 error = XFS_ERROR(EINVAL); 811 error = XFS_ERROR(EINVAL);
823 goto out_brelse; 812 goto out_brelse;
@@ -835,9 +824,8 @@ xfs_iread(
835 error = xfs_iformat(ip, dip); 824 error = xfs_iformat(ip, dip);
836 if (error) { 825 if (error) {
837#ifdef DEBUG 826#ifdef DEBUG
838 xfs_fs_cmn_err(CE_ALERT, mp, "xfs_iread: " 827 xfs_alert(mp, "%s: xfs_iformat() returned error %d",
839 "xfs_iformat() returned error %d", 828 __func__, error);
840 error);
841#endif /* DEBUG */ 829#endif /* DEBUG */
842 goto out_brelse; 830 goto out_brelse;
843 } 831 }
@@ -1016,8 +1004,8 @@ xfs_ialloc(
1016 * This is because we're setting fields here we need 1004 * This is because we're setting fields here we need
1017 * to prevent others from looking at until we're done. 1005 * to prevent others from looking at until we're done.
1018 */ 1006 */
1019 error = xfs_trans_iget(tp->t_mountp, tp, ino, 1007 error = xfs_iget(tp->t_mountp, tp, ino, XFS_IGET_CREATE,
1020 XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); 1008 XFS_ILOCK_EXCL, &ip);
1021 if (error) 1009 if (error)
1022 return error; 1010 return error;
1023 ASSERT(ip != NULL); 1011 ASSERT(ip != NULL);
@@ -1166,6 +1154,7 @@ xfs_ialloc(
1166 /* 1154 /*
1167 * Log the new values stuffed into the inode. 1155 * Log the new values stuffed into the inode.
1168 */ 1156 */
1157 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
1169 xfs_trans_log_inode(tp, ip, flags); 1158 xfs_trans_log_inode(tp, ip, flags);
1170 1159
1171 /* now that we have an i_mode we can setup inode ops and unlock */ 1160 /* now that we have an i_mode we can setup inode ops and unlock */
@@ -1820,9 +1809,8 @@ xfs_iunlink_remove(
1820 */ 1809 */
1821 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 1810 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1822 if (error) { 1811 if (error) {
1823 cmn_err(CE_WARN, 1812 xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
1824 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1813 __func__, error);
1825 error, mp->m_fsname);
1826 return error; 1814 return error;
1827 } 1815 }
1828 next_agino = be32_to_cpu(dip->di_next_unlinked); 1816 next_agino = be32_to_cpu(dip->di_next_unlinked);
@@ -1867,9 +1855,9 @@ xfs_iunlink_remove(
1867 error = xfs_inotobp(mp, tp, next_ino, &last_dip, 1855 error = xfs_inotobp(mp, tp, next_ino, &last_dip,
1868 &last_ibp, &last_offset, 0); 1856 &last_ibp, &last_offset, 0);
1869 if (error) { 1857 if (error) {
1870 cmn_err(CE_WARN, 1858 xfs_warn(mp,
1871 "xfs_iunlink_remove: xfs_inotobp() returned an error %d on %s. Returning error.", 1859 "%s: xfs_inotobp() returned error %d.",
1872 error, mp->m_fsname); 1860 __func__, error);
1873 return error; 1861 return error;
1874 } 1862 }
1875 next_agino = be32_to_cpu(last_dip->di_next_unlinked); 1863 next_agino = be32_to_cpu(last_dip->di_next_unlinked);
@@ -1882,9 +1870,8 @@ xfs_iunlink_remove(
1882 */ 1870 */
1883 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK); 1871 error = xfs_itobp(mp, tp, ip, &dip, &ibp, XBF_LOCK);
1884 if (error) { 1872 if (error) {
1885 cmn_err(CE_WARN, 1873 xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
1886 "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", 1874 __func__, error);
1887 error, mp->m_fsname);
1888 return error; 1875 return error;
1889 } 1876 }
1890 next_agino = be32_to_cpu(dip->di_next_unlinked); 1877 next_agino = be32_to_cpu(dip->di_next_unlinked);
@@ -2939,16 +2926,16 @@ xfs_iflush_int(
2939 2926
2940 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, 2927 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
2941 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 2928 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
2942 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2929 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2943 "xfs_iflush: Bad inode %Lu magic number 0x%x, ptr 0x%p", 2930 "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
2944 ip->i_ino, be16_to_cpu(dip->di_magic), dip); 2931 __func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
2945 goto corrupt_out; 2932 goto corrupt_out;
2946 } 2933 }
2947 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC, 2934 if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
2948 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) { 2935 mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
2949 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2936 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2950 "xfs_iflush: Bad inode %Lu, ptr 0x%p, magic number 0x%x", 2937 "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
2951 ip->i_ino, ip, ip->i_d.di_magic); 2938 __func__, ip->i_ino, ip, ip->i_d.di_magic);
2952 goto corrupt_out; 2939 goto corrupt_out;
2953 } 2940 }
2954 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { 2941 if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
@@ -2956,9 +2943,9 @@ xfs_iflush_int(
2956 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) && 2943 (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
2957 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE), 2944 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
2958 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) { 2945 mp, XFS_ERRTAG_IFLUSH_3, XFS_RANDOM_IFLUSH_3)) {
2959 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2946 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2960 "xfs_iflush: Bad regular inode %Lu, ptr 0x%p", 2947 "%s: Bad regular inode %Lu, ptr 0x%p",
2961 ip->i_ino, ip); 2948 __func__, ip->i_ino, ip);
2962 goto corrupt_out; 2949 goto corrupt_out;
2963 } 2950 }
2964 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { 2951 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
@@ -2967,28 +2954,28 @@ xfs_iflush_int(
2967 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) && 2954 (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
2968 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL), 2955 (ip->i_d.di_format != XFS_DINODE_FMT_LOCAL),
2969 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) { 2956 mp, XFS_ERRTAG_IFLUSH_4, XFS_RANDOM_IFLUSH_4)) {
2970 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2957 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2971 "xfs_iflush: Bad directory inode %Lu, ptr 0x%p", 2958 "%s: Bad directory inode %Lu, ptr 0x%p",
2972 ip->i_ino, ip); 2959 __func__, ip->i_ino, ip);
2973 goto corrupt_out; 2960 goto corrupt_out;
2974 } 2961 }
2975 } 2962 }
2976 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents > 2963 if (XFS_TEST_ERROR(ip->i_d.di_nextents + ip->i_d.di_anextents >
2977 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5, 2964 ip->i_d.di_nblocks, mp, XFS_ERRTAG_IFLUSH_5,
2978 XFS_RANDOM_IFLUSH_5)) { 2965 XFS_RANDOM_IFLUSH_5)) {
2979 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2966 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2980 "xfs_iflush: detected corrupt incore inode %Lu, total extents = %d, nblocks = %Ld, ptr 0x%p", 2967 "%s: detected corrupt incore inode %Lu, "
2981 ip->i_ino, 2968 "total extents = %d, nblocks = %Ld, ptr 0x%p",
2969 __func__, ip->i_ino,
2982 ip->i_d.di_nextents + ip->i_d.di_anextents, 2970 ip->i_d.di_nextents + ip->i_d.di_anextents,
2983 ip->i_d.di_nblocks, 2971 ip->i_d.di_nblocks, ip);
2984 ip);
2985 goto corrupt_out; 2972 goto corrupt_out;
2986 } 2973 }
2987 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize, 2974 if (XFS_TEST_ERROR(ip->i_d.di_forkoff > mp->m_sb.sb_inodesize,
2988 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) { 2975 mp, XFS_ERRTAG_IFLUSH_6, XFS_RANDOM_IFLUSH_6)) {
2989 xfs_cmn_err(XFS_PTAG_IFLUSH, CE_ALERT, mp, 2976 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2990 "xfs_iflush: bad inode %Lu, forkoff 0x%x, ptr 0x%p", 2977 "%s: bad inode %Lu, forkoff 0x%x, ptr 0x%p",
2991 ip->i_ino, ip->i_d.di_forkoff, ip); 2978 __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
2992 goto corrupt_out; 2979 goto corrupt_out;
2993 } 2980 }
2994 /* 2981 /*
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 5c95fa8ec11d..f753200cef8d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -409,28 +409,35 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
409/* 409/*
410 * Flags for lockdep annotations. 410 * Flags for lockdep annotations.
411 * 411 *
412 * XFS_I[O]LOCK_PARENT - for operations that require locking two inodes 412 * XFS_LOCK_PARENT - for directory operations that require locking a
413 * (ie directory operations that require locking a directory inode and 413 * parent directory inode and a child entry inode. The parent gets locked
414 * an entry inode). The first inode gets locked with this flag so it 414 * with this flag so it gets a lockdep subclass of 1 and the child entry
415 * gets a lockdep subclass of 1 and the second lock will have a lockdep 415 * lock will have a lockdep subclass of 0.
416 * subclass of 0. 416 *
417 * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
418 * inodes do not participate in the normal lock order, and thus have their
419 * own subclasses.
417 * 420 *
418 * XFS_LOCK_INUMORDER - for locking several inodes at the some time 421 * XFS_LOCK_INUMORDER - for locking several inodes at the some time
419 * with xfs_lock_inodes(). This flag is used as the starting subclass 422 * with xfs_lock_inodes(). This flag is used as the starting subclass
420 * and each subsequent lock acquired will increment the subclass by one. 423 * and each subsequent lock acquired will increment the subclass by one.
421 * So the first lock acquired will have a lockdep subclass of 2, the 424 * So the first lock acquired will have a lockdep subclass of 4, the
422 * second lock will have a lockdep subclass of 3, and so on. It is 425 * second lock will have a lockdep subclass of 5, and so on. It is
423 * the responsibility of the class builder to shift this to the correct 426 * the responsibility of the class builder to shift this to the correct
424 * portion of the lock_mode lockdep mask. 427 * portion of the lock_mode lockdep mask.
425 */ 428 */
426#define XFS_LOCK_PARENT 1 429#define XFS_LOCK_PARENT 1
427#define XFS_LOCK_INUMORDER 2 430#define XFS_LOCK_RTBITMAP 2
431#define XFS_LOCK_RTSUM 3
432#define XFS_LOCK_INUMORDER 4
428 433
429#define XFS_IOLOCK_SHIFT 16 434#define XFS_IOLOCK_SHIFT 16
430#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT) 435#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
431 436
432#define XFS_ILOCK_SHIFT 24 437#define XFS_ILOCK_SHIFT 24
433#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT) 438#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
439#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
440#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
434 441
435#define XFS_IOLOCK_DEP_MASK 0x00ff0000 442#define XFS_IOLOCK_DEP_MASK 0x00ff0000
436#define XFS_ILOCK_DEP_MASK 0xff000000 443#define XFS_ILOCK_DEP_MASK 0xff000000
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 8a0f044750c3..091d82b94c4d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -101,11 +101,11 @@ xfs_iomap_eof_align_last_fsb(
101} 101}
102 102
103STATIC int 103STATIC int
104xfs_cmn_err_fsblock_zero( 104xfs_alert_fsblock_zero(
105 xfs_inode_t *ip, 105 xfs_inode_t *ip,
106 xfs_bmbt_irec_t *imap) 106 xfs_bmbt_irec_t *imap)
107{ 107{
108 xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount, 108 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
109 "Access to block zero in inode %llu " 109 "Access to block zero in inode %llu "
110 "start_block: %llx start_off: %llx " 110 "start_block: %llx start_off: %llx "
111 "blkcnt: %llx extent-state: %x\n", 111 "blkcnt: %llx extent-state: %x\n",
@@ -246,7 +246,7 @@ xfs_iomap_write_direct(
246 } 246 }
247 247
248 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) { 248 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) {
249 error = xfs_cmn_err_fsblock_zero(ip, imap); 249 error = xfs_alert_fsblock_zero(ip, imap);
250 goto error_out; 250 goto error_out;
251 } 251 }
252 252
@@ -464,7 +464,7 @@ retry:
464 } 464 }
465 465
466 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) 466 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
467 return xfs_cmn_err_fsblock_zero(ip, &imap[0]); 467 return xfs_alert_fsblock_zero(ip, &imap[0]);
468 468
469 *ret_imap = imap[0]; 469 *ret_imap = imap[0];
470 return 0; 470 return 0;
@@ -614,7 +614,7 @@ xfs_iomap_write_allocate(
614 * covers at least part of the callers request 614 * covers at least part of the callers request
615 */ 615 */
616 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) 616 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
617 return xfs_cmn_err_fsblock_zero(ip, imap); 617 return xfs_alert_fsblock_zero(ip, imap);
618 618
619 if ((offset_fsb >= imap->br_startoff) && 619 if ((offset_fsb >= imap->br_startoff) &&
620 (offset_fsb < (imap->br_startoff + 620 (offset_fsb < (imap->br_startoff +
@@ -724,7 +724,7 @@ xfs_iomap_write_unwritten(
724 return XFS_ERROR(error); 724 return XFS_ERROR(error);
725 725
726 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) 726 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
727 return xfs_cmn_err_fsblock_zero(ip, &imap); 727 return xfs_alert_fsblock_zero(ip, &imap);
728 728
729 if ((numblks_fsb = imap.br_blockcount) == 0) { 729 if ((numblks_fsb = imap.br_blockcount) == 0) {
730 /* 730 /*
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ae6fef1ff563..25efa9b8a602 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -374,11 +374,10 @@ xfs_log_mount(
374 int error; 374 int error;
375 375
376 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) 376 if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
377 cmn_err(CE_NOTE, "XFS mounting filesystem %s", mp->m_fsname); 377 xfs_notice(mp, "Mounting Filesystem");
378 else { 378 else {
379 cmn_err(CE_NOTE, 379 xfs_notice(mp,
380 "Mounting filesystem \"%s\" in no-recovery mode. Filesystem will be inconsistent.", 380"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent.");
381 mp->m_fsname);
382 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 381 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
383 } 382 }
384 383
@@ -393,7 +392,7 @@ xfs_log_mount(
393 */ 392 */
394 error = xfs_trans_ail_init(mp); 393 error = xfs_trans_ail_init(mp);
395 if (error) { 394 if (error) {
396 cmn_err(CE_WARN, "XFS: AIL initialisation failed: error %d", error); 395 xfs_warn(mp, "AIL initialisation failed: error %d", error);
397 goto out_free_log; 396 goto out_free_log;
398 } 397 }
399 mp->m_log->l_ailp = mp->m_ail; 398 mp->m_log->l_ailp = mp->m_ail;
@@ -413,7 +412,8 @@ xfs_log_mount(
413 if (readonly) 412 if (readonly)
414 mp->m_flags |= XFS_MOUNT_RDONLY; 413 mp->m_flags |= XFS_MOUNT_RDONLY;
415 if (error) { 414 if (error) {
416 cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error); 415 xfs_warn(mp, "log mount/recovery failed: error %d",
416 error);
417 goto out_destroy_ail; 417 goto out_destroy_ail;
418 } 418 }
419 } 419 }
@@ -542,10 +542,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
542 */ 542 */
543 } 543 }
544 544
545 if (error) { 545 if (error)
546 xfs_fs_cmn_err(CE_ALERT, mp, 546 xfs_alert(mp, "%s: unmount record failed", __func__);
547 "xfs_log_unmount: unmount record failed");
548 }
549 547
550 548
551 spin_lock(&log->l_icloglock); 549 spin_lock(&log->l_icloglock);
@@ -852,7 +850,7 @@ xlog_space_left(
852 * In this case we just want to return the size of the 850 * In this case we just want to return the size of the
853 * log as the amount of space left. 851 * log as the amount of space left.
854 */ 852 */
855 xfs_fs_cmn_err(CE_ALERT, log->l_mp, 853 xfs_alert(log->l_mp,
856 "xlog_space_left: head behind tail\n" 854 "xlog_space_left: head behind tail\n"
857 " tail_cycle = %d, tail_bytes = %d\n" 855 " tail_cycle = %d, tail_bytes = %d\n"
858 " GH cycle = %d, GH bytes = %d", 856 " GH cycle = %d, GH bytes = %d",
@@ -1001,7 +999,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1001 999
1002 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); 1000 log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
1003 if (!log) { 1001 if (!log) {
1004 xlog_warn("XFS: Log allocation failed: No memory!"); 1002 xfs_warn(mp, "Log allocation failed: No memory!");
1005 goto out; 1003 goto out;
1006 } 1004 }
1007 1005
@@ -1029,24 +1027,24 @@ xlog_alloc_log(xfs_mount_t *mp,
1029 if (xfs_sb_version_hassector(&mp->m_sb)) { 1027 if (xfs_sb_version_hassector(&mp->m_sb)) {
1030 log2_size = mp->m_sb.sb_logsectlog; 1028 log2_size = mp->m_sb.sb_logsectlog;
1031 if (log2_size < BBSHIFT) { 1029 if (log2_size < BBSHIFT) {
1032 xlog_warn("XFS: Log sector size too small " 1030 xfs_warn(mp, "Log sector size too small (0x%x < 0x%x)",
1033 "(0x%x < 0x%x)", log2_size, BBSHIFT); 1031 log2_size, BBSHIFT);
1034 goto out_free_log; 1032 goto out_free_log;
1035 } 1033 }
1036 1034
1037 log2_size -= BBSHIFT; 1035 log2_size -= BBSHIFT;
1038 if (log2_size > mp->m_sectbb_log) { 1036 if (log2_size > mp->m_sectbb_log) {
1039 xlog_warn("XFS: Log sector size too large " 1037 xfs_warn(mp, "Log sector size too large (0x%x > 0x%x)",
1040 "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log); 1038 log2_size, mp->m_sectbb_log);
1041 goto out_free_log; 1039 goto out_free_log;
1042 } 1040 }
1043 1041
1044 /* for larger sector sizes, must have v2 or external log */ 1042 /* for larger sector sizes, must have v2 or external log */
1045 if (log2_size && log->l_logBBstart > 0 && 1043 if (log2_size && log->l_logBBstart > 0 &&
1046 !xfs_sb_version_haslogv2(&mp->m_sb)) { 1044 !xfs_sb_version_haslogv2(&mp->m_sb)) {
1047 1045 xfs_warn(mp,
1048 xlog_warn("XFS: log sector size (0x%x) invalid " 1046 "log sector size (0x%x) invalid for configuration.",
1049 "for configuration.", log2_size); 1047 log2_size);
1050 goto out_free_log; 1048 goto out_free_log;
1051 } 1049 }
1052 } 1050 }
@@ -1563,38 +1561,36 @@ xlog_print_tic_res(
1563 "SWAPEXT" 1561 "SWAPEXT"
1564 }; 1562 };
1565 1563
1566 xfs_fs_cmn_err(CE_WARN, mp, 1564 xfs_warn(mp,
1567 "xfs_log_write: reservation summary:\n" 1565 "xfs_log_write: reservation summary:\n"
1568 " trans type = %s (%u)\n" 1566 " trans type = %s (%u)\n"
1569 " unit res = %d bytes\n" 1567 " unit res = %d bytes\n"
1570 " current res = %d bytes\n" 1568 " current res = %d bytes\n"
1571 " total reg = %u bytes (o/flow = %u bytes)\n" 1569 " total reg = %u bytes (o/flow = %u bytes)\n"
1572 " ophdrs = %u (ophdr space = %u bytes)\n" 1570 " ophdrs = %u (ophdr space = %u bytes)\n"
1573 " ophdr + reg = %u bytes\n" 1571 " ophdr + reg = %u bytes\n"
1574 " num regions = %u\n", 1572 " num regions = %u\n",
1575 ((ticket->t_trans_type <= 0 || 1573 ((ticket->t_trans_type <= 0 ||
1576 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ? 1574 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
1577 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]), 1575 "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
1578 ticket->t_trans_type, 1576 ticket->t_trans_type,
1579 ticket->t_unit_res, 1577 ticket->t_unit_res,
1580 ticket->t_curr_res, 1578 ticket->t_curr_res,
1581 ticket->t_res_arr_sum, ticket->t_res_o_flow, 1579 ticket->t_res_arr_sum, ticket->t_res_o_flow,
1582 ticket->t_res_num_ophdrs, ophdr_spc, 1580 ticket->t_res_num_ophdrs, ophdr_spc,
1583 ticket->t_res_arr_sum + 1581 ticket->t_res_arr_sum +
1584 ticket->t_res_o_flow + ophdr_spc, 1582 ticket->t_res_o_flow + ophdr_spc,
1585 ticket->t_res_num); 1583 ticket->t_res_num);
1586 1584
1587 for (i = 0; i < ticket->t_res_num; i++) { 1585 for (i = 0; i < ticket->t_res_num; i++) {
1588 uint r_type = ticket->t_res_arr[i].r_type; 1586 uint r_type = ticket->t_res_arr[i].r_type;
1589 cmn_err(CE_WARN, 1587 xfs_warn(mp, "region[%u]: %s - %u bytes\n", i,
1590 "region[%u]: %s - %u bytes\n",
1591 i,
1592 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? 1588 ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
1593 "bad-rtype" : res_type_str[r_type-1]), 1589 "bad-rtype" : res_type_str[r_type-1]),
1594 ticket->t_res_arr[i].r_len); 1590 ticket->t_res_arr[i].r_len);
1595 } 1591 }
1596 1592
1597 xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp, 1593 xfs_alert_tag(mp, XFS_PTAG_LOGRES,
1598 "xfs_log_write: reservation ran out. Need to up reservation"); 1594 "xfs_log_write: reservation ran out. Need to up reservation");
1599 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1595 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1600} 1596}
@@ -1682,7 +1678,7 @@ xlog_write_setup_ophdr(
1682 case XFS_LOG: 1678 case XFS_LOG:
1683 break; 1679 break;
1684 default: 1680 default:
1685 xfs_fs_cmn_err(CE_WARN, log->l_mp, 1681 xfs_warn(log->l_mp,
1686 "Bad XFS transaction clientid 0x%x in ticket 0x%p", 1682 "Bad XFS transaction clientid 0x%x in ticket 0x%p",
1687 ophdr->oh_clientid, ticket); 1683 ophdr->oh_clientid, ticket);
1688 return NULL; 1684 return NULL;
@@ -2264,7 +2260,7 @@ xlog_state_do_callback(
2264 if (repeats > 5000) { 2260 if (repeats > 5000) {
2265 flushcnt += repeats; 2261 flushcnt += repeats;
2266 repeats = 0; 2262 repeats = 0;
2267 xfs_fs_cmn_err(CE_WARN, log->l_mp, 2263 xfs_warn(log->l_mp,
2268 "%s: possible infinite loop (%d iterations)", 2264 "%s: possible infinite loop (%d iterations)",
2269 __func__, flushcnt); 2265 __func__, flushcnt);
2270 } 2266 }
@@ -3052,10 +3048,8 @@ xfs_log_force(
3052 int error; 3048 int error;
3053 3049
3054 error = _xfs_log_force(mp, flags, NULL); 3050 error = _xfs_log_force(mp, flags, NULL);
3055 if (error) { 3051 if (error)
3056 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " 3052 xfs_warn(mp, "%s: error %d returned.", __func__, error);
3057 "error %d returned.", error);
3058 }
3059} 3053}
3060 3054
3061/* 3055/*
@@ -3204,10 +3198,8 @@ xfs_log_force_lsn(
3204 int error; 3198 int error;
3205 3199
3206 error = _xfs_log_force_lsn(mp, lsn, flags, NULL); 3200 error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
3207 if (error) { 3201 if (error)
3208 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " 3202 xfs_warn(mp, "%s: error %d returned.", __func__, error);
3209 "error %d returned.", error);
3210 }
3211} 3203}
3212 3204
3213/* 3205/*
@@ -3412,7 +3404,7 @@ xlog_verify_dest_ptr(
3412 } 3404 }
3413 3405
3414 if (!good_ptr) 3406 if (!good_ptr)
3415 xlog_panic("xlog_verify_dest_ptr: invalid ptr"); 3407 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3416} 3408}
3417 3409
3418STATIC void 3410STATIC void
@@ -3448,16 +3440,16 @@ xlog_verify_tail_lsn(xlog_t *log,
3448 blocks = 3440 blocks =
3449 log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn)); 3441 log->l_logBBsize - (log->l_prev_block - BLOCK_LSN(tail_lsn));
3450 if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize)) 3442 if (blocks < BTOBB(iclog->ic_offset)+BTOBB(log->l_iclog_hsize))
3451 xlog_panic("xlog_verify_tail_lsn: ran out of log space"); 3443 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
3452 } else { 3444 } else {
3453 ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle); 3445 ASSERT(CYCLE_LSN(tail_lsn)+1 == log->l_prev_cycle);
3454 3446
3455 if (BLOCK_LSN(tail_lsn) == log->l_prev_block) 3447 if (BLOCK_LSN(tail_lsn) == log->l_prev_block)
3456 xlog_panic("xlog_verify_tail_lsn: tail wrapped"); 3448 xfs_emerg(log->l_mp, "%s: tail wrapped", __func__);
3457 3449
3458 blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block; 3450 blocks = BLOCK_LSN(tail_lsn) - log->l_prev_block;
3459 if (blocks < BTOBB(iclog->ic_offset) + 1) 3451 if (blocks < BTOBB(iclog->ic_offset) + 1)
3460 xlog_panic("xlog_verify_tail_lsn: ran out of log space"); 3452 xfs_emerg(log->l_mp, "%s: ran out of log space", __func__);
3461 } 3453 }
3462} /* xlog_verify_tail_lsn */ 3454} /* xlog_verify_tail_lsn */
3463 3455
@@ -3497,22 +3489,23 @@ xlog_verify_iclog(xlog_t *log,
3497 icptr = log->l_iclog; 3489 icptr = log->l_iclog;
3498 for (i=0; i < log->l_iclog_bufs; i++) { 3490 for (i=0; i < log->l_iclog_bufs; i++) {
3499 if (icptr == NULL) 3491 if (icptr == NULL)
3500 xlog_panic("xlog_verify_iclog: invalid ptr"); 3492 xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
3501 icptr = icptr->ic_next; 3493 icptr = icptr->ic_next;
3502 } 3494 }
3503 if (icptr != log->l_iclog) 3495 if (icptr != log->l_iclog)
3504 xlog_panic("xlog_verify_iclog: corrupt iclog ring"); 3496 xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);
3505 spin_unlock(&log->l_icloglock); 3497 spin_unlock(&log->l_icloglock);
3506 3498
3507 /* check log magic numbers */ 3499 /* check log magic numbers */
3508 if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) 3500 if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM)
3509 xlog_panic("xlog_verify_iclog: invalid magic num"); 3501 xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
3510 3502
3511 ptr = (xfs_caddr_t) &iclog->ic_header; 3503 ptr = (xfs_caddr_t) &iclog->ic_header;
3512 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; 3504 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
3513 ptr += BBSIZE) { 3505 ptr += BBSIZE) {
3514 if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) 3506 if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
3515 xlog_panic("xlog_verify_iclog: unexpected magic num"); 3507 xfs_emerg(log->l_mp, "%s: unexpected magic num",
3508 __func__);
3516 } 3509 }
3517 3510
3518 /* check fields */ 3511 /* check fields */
@@ -3542,9 +3535,10 @@ xlog_verify_iclog(xlog_t *log,
3542 } 3535 }
3543 } 3536 }
3544 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG) 3537 if (clientid != XFS_TRANSACTION && clientid != XFS_LOG)
3545 cmn_err(CE_WARN, "xlog_verify_iclog: " 3538 xfs_warn(log->l_mp,
3546 "invalid clientid %d op 0x%p offset 0x%lx", 3539 "%s: invalid clientid %d op 0x%p offset 0x%lx",
3547 clientid, ophead, (unsigned long)field_offset); 3540 __func__, clientid, ophead,
3541 (unsigned long)field_offset);
3548 3542
3549 /* check length */ 3543 /* check length */
3550 field_offset = (__psint_t) 3544 field_offset = (__psint_t)
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index d5f8be8f4bf6..15dbf1f9c2be 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -87,10 +87,6 @@ static inline uint xlog_get_client_id(__be32 i)
87 return be32_to_cpu(i) >> 24; 87 return be32_to_cpu(i) >> 24;
88} 88}
89 89
90#define xlog_panic(args...) cmn_err(CE_PANIC, ## args)
91#define xlog_exit(args...) cmn_err(CE_PANIC, ## args)
92#define xlog_warn(args...) cmn_err(CE_WARN, ## args)
93
94/* 90/*
95 * In core log state 91 * In core log state
96 */ 92 */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index aa0ebb776903..0c4a5618e7af 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -92,7 +92,7 @@ xlog_get_bp(
92 int nbblks) 92 int nbblks)
93{ 93{
94 if (!xlog_buf_bbcount_valid(log, nbblks)) { 94 if (!xlog_buf_bbcount_valid(log, nbblks)) {
95 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 95 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
96 nbblks); 96 nbblks);
97 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 97 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
98 return NULL; 98 return NULL;
@@ -160,7 +160,7 @@ xlog_bread_noalign(
160 int error; 160 int error;
161 161
162 if (!xlog_buf_bbcount_valid(log, nbblks)) { 162 if (!xlog_buf_bbcount_valid(log, nbblks)) {
163 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 163 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
164 nbblks); 164 nbblks);
165 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 165 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
166 return EFSCORRUPTED; 166 return EFSCORRUPTED;
@@ -219,7 +219,7 @@ xlog_bwrite(
219 int error; 219 int error;
220 220
221 if (!xlog_buf_bbcount_valid(log, nbblks)) { 221 if (!xlog_buf_bbcount_valid(log, nbblks)) {
222 xlog_warn("XFS: Invalid block length (0x%x) given for buffer", 222 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
223 nbblks); 223 nbblks);
224 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 224 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
225 return EFSCORRUPTED; 225 return EFSCORRUPTED;
@@ -254,9 +254,9 @@ xlog_header_check_dump(
254 xfs_mount_t *mp, 254 xfs_mount_t *mp,
255 xlog_rec_header_t *head) 255 xlog_rec_header_t *head)
256{ 256{
257 cmn_err(CE_DEBUG, "%s: SB : uuid = %pU, fmt = %d\n", 257 xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d\n",
258 __func__, &mp->m_sb.sb_uuid, XLOG_FMT); 258 __func__, &mp->m_sb.sb_uuid, XLOG_FMT);
259 cmn_err(CE_DEBUG, " log : uuid = %pU, fmt = %d\n", 259 xfs_debug(mp, " log : uuid = %pU, fmt = %d\n",
260 &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); 260 &head->h_fs_uuid, be32_to_cpu(head->h_fmt));
261} 261}
262#else 262#else
@@ -279,15 +279,15 @@ xlog_header_check_recover(
279 * a dirty log created in IRIX. 279 * a dirty log created in IRIX.
280 */ 280 */
281 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { 281 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
282 xlog_warn( 282 xfs_warn(mp,
283 "XFS: dirty log written in incompatible format - can't recover"); 283 "dirty log written in incompatible format - can't recover");
284 xlog_header_check_dump(mp, head); 284 xlog_header_check_dump(mp, head);
285 XFS_ERROR_REPORT("xlog_header_check_recover(1)", 285 XFS_ERROR_REPORT("xlog_header_check_recover(1)",
286 XFS_ERRLEVEL_HIGH, mp); 286 XFS_ERRLEVEL_HIGH, mp);
287 return XFS_ERROR(EFSCORRUPTED); 287 return XFS_ERROR(EFSCORRUPTED);
288 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 288 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
289 xlog_warn( 289 xfs_warn(mp,
290 "XFS: dirty log entry has mismatched uuid - can't recover"); 290 "dirty log entry has mismatched uuid - can't recover");
291 xlog_header_check_dump(mp, head); 291 xlog_header_check_dump(mp, head);
292 XFS_ERROR_REPORT("xlog_header_check_recover(2)", 292 XFS_ERROR_REPORT("xlog_header_check_recover(2)",
293 XFS_ERRLEVEL_HIGH, mp); 293 XFS_ERRLEVEL_HIGH, mp);
@@ -312,9 +312,9 @@ xlog_header_check_mount(
312 * h_fs_uuid is nil, we assume this log was last mounted 312 * h_fs_uuid is nil, we assume this log was last mounted
313 * by IRIX and continue. 313 * by IRIX and continue.
314 */ 314 */
315 xlog_warn("XFS: nil uuid in log - IRIX style log"); 315 xfs_warn(mp, "nil uuid in log - IRIX style log");
316 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { 316 } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
317 xlog_warn("XFS: log has mismatched uuid - can't recover"); 317 xfs_warn(mp, "log has mismatched uuid - can't recover");
318 xlog_header_check_dump(mp, head); 318 xlog_header_check_dump(mp, head);
319 XFS_ERROR_REPORT("xlog_header_check_mount", 319 XFS_ERROR_REPORT("xlog_header_check_mount",
320 XFS_ERRLEVEL_HIGH, mp); 320 XFS_ERRLEVEL_HIGH, mp);
@@ -490,8 +490,8 @@ xlog_find_verify_log_record(
490 for (i = (*last_blk) - 1; i >= 0; i--) { 490 for (i = (*last_blk) - 1; i >= 0; i--) {
491 if (i < start_blk) { 491 if (i < start_blk) {
492 /* valid log record not found */ 492 /* valid log record not found */
493 xlog_warn( 493 xfs_warn(log->l_mp,
494 "XFS: Log inconsistent (didn't find previous header)"); 494 "Log inconsistent (didn't find previous header)");
495 ASSERT(0); 495 ASSERT(0);
496 error = XFS_ERROR(EIO); 496 error = XFS_ERROR(EIO);
497 goto out; 497 goto out;
@@ -591,12 +591,12 @@ xlog_find_head(
591 * mkfs etc write a dummy unmount record to a fresh 591 * mkfs etc write a dummy unmount record to a fresh
592 * log so we can store the uuid in there 592 * log so we can store the uuid in there
593 */ 593 */
594 xlog_warn("XFS: totally zeroed log"); 594 xfs_warn(log->l_mp, "totally zeroed log");
595 } 595 }
596 596
597 return 0; 597 return 0;
598 } else if (error) { 598 } else if (error) {
599 xlog_warn("XFS: empty log check failed"); 599 xfs_warn(log->l_mp, "empty log check failed");
600 return error; 600 return error;
601 } 601 }
602 602
@@ -819,7 +819,7 @@ validate_head:
819 xlog_put_bp(bp); 819 xlog_put_bp(bp);
820 820
821 if (error) 821 if (error)
822 xlog_warn("XFS: failed to find log head"); 822 xfs_warn(log->l_mp, "failed to find log head");
823 return error; 823 return error;
824} 824}
825 825
@@ -912,7 +912,7 @@ xlog_find_tail(
912 } 912 }
913 } 913 }
914 if (!found) { 914 if (!found) {
915 xlog_warn("XFS: xlog_find_tail: couldn't find sync record"); 915 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
916 ASSERT(0); 916 ASSERT(0);
917 return XFS_ERROR(EIO); 917 return XFS_ERROR(EIO);
918 } 918 }
@@ -1028,7 +1028,7 @@ done:
1028 xlog_put_bp(bp); 1028 xlog_put_bp(bp);
1029 1029
1030 if (error) 1030 if (error)
1031 xlog_warn("XFS: failed to locate log tail"); 1031 xfs_warn(log->l_mp, "failed to locate log tail");
1032 return error; 1032 return error;
1033} 1033}
1034 1034
@@ -1092,7 +1092,8 @@ xlog_find_zeroed(
1092 * the first block must be 1. If it's not, maybe we're 1092 * the first block must be 1. If it's not, maybe we're
1093 * not looking at a log... Bail out. 1093 * not looking at a log... Bail out.
1094 */ 1094 */
1095 xlog_warn("XFS: Log inconsistent or not a log (last==0, first!=1)"); 1095 xfs_warn(log->l_mp,
1096 "Log inconsistent or not a log (last==0, first!=1)");
1096 return XFS_ERROR(EINVAL); 1097 return XFS_ERROR(EINVAL);
1097 } 1098 }
1098 1099
@@ -1506,8 +1507,8 @@ xlog_recover_add_to_trans(
1506 if (list_empty(&trans->r_itemq)) { 1507 if (list_empty(&trans->r_itemq)) {
1507 /* we need to catch log corruptions here */ 1508 /* we need to catch log corruptions here */
1508 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 1509 if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) {
1509 xlog_warn("XFS: xlog_recover_add_to_trans: " 1510 xfs_warn(log->l_mp, "%s: bad header magic number",
1510 "bad header magic number"); 1511 __func__);
1511 ASSERT(0); 1512 ASSERT(0);
1512 return XFS_ERROR(EIO); 1513 return XFS_ERROR(EIO);
1513 } 1514 }
@@ -1534,8 +1535,8 @@ xlog_recover_add_to_trans(
1534 if (item->ri_total == 0) { /* first region to be added */ 1535 if (item->ri_total == 0) { /* first region to be added */
1535 if (in_f->ilf_size == 0 || 1536 if (in_f->ilf_size == 0 ||
1536 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { 1537 in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) {
1537 xlog_warn( 1538 xfs_warn(log->l_mp,
1538 "XFS: bad number of regions (%d) in inode log format", 1539 "bad number of regions (%d) in inode log format",
1539 in_f->ilf_size); 1540 in_f->ilf_size);
1540 ASSERT(0); 1541 ASSERT(0);
1541 return XFS_ERROR(EIO); 1542 return XFS_ERROR(EIO);
@@ -1592,8 +1593,9 @@ xlog_recover_reorder_trans(
1592 list_move_tail(&item->ri_list, &trans->r_itemq); 1593 list_move_tail(&item->ri_list, &trans->r_itemq);
1593 break; 1594 break;
1594 default: 1595 default:
1595 xlog_warn( 1596 xfs_warn(log->l_mp,
1596 "XFS: xlog_recover_reorder_trans: unrecognized type of log operation"); 1597 "%s: unrecognized type of log operation",
1598 __func__);
1597 ASSERT(0); 1599 ASSERT(0);
1598 return XFS_ERROR(EIO); 1600 return XFS_ERROR(EIO);
1599 } 1601 }
@@ -1803,8 +1805,9 @@ xlog_recover_do_inode_buffer(
1803 logged_nextp = item->ri_buf[item_index].i_addr + 1805 logged_nextp = item->ri_buf[item_index].i_addr +
1804 next_unlinked_offset - reg_buf_offset; 1806 next_unlinked_offset - reg_buf_offset;
1805 if (unlikely(*logged_nextp == 0)) { 1807 if (unlikely(*logged_nextp == 0)) {
1806 xfs_fs_cmn_err(CE_ALERT, mp, 1808 xfs_alert(mp,
1807 "bad inode buffer log record (ptr = 0x%p, bp = 0x%p). XFS trying to replay bad (0) inode di_next_unlinked field", 1809 "Bad inode buffer log record (ptr = 0x%p, bp = 0x%p). "
1810 "Trying to replay bad (0) inode di_next_unlinked field.",
1808 item, bp); 1811 item, bp);
1809 XFS_ERROR_REPORT("xlog_recover_do_inode_buf", 1812 XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
1810 XFS_ERRLEVEL_LOW, mp); 1813 XFS_ERRLEVEL_LOW, mp);
@@ -1863,17 +1866,17 @@ xlog_recover_do_reg_buffer(
1863 if (buf_f->blf_flags & 1866 if (buf_f->blf_flags &
1864 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) { 1867 (XFS_BLF_UDQUOT_BUF|XFS_BLF_PDQUOT_BUF|XFS_BLF_GDQUOT_BUF)) {
1865 if (item->ri_buf[i].i_addr == NULL) { 1868 if (item->ri_buf[i].i_addr == NULL) {
1866 cmn_err(CE_ALERT, 1869 xfs_alert(mp,
1867 "XFS: NULL dquot in %s.", __func__); 1870 "XFS: NULL dquot in %s.", __func__);
1868 goto next; 1871 goto next;
1869 } 1872 }
1870 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) { 1873 if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
1871 cmn_err(CE_ALERT, 1874 xfs_alert(mp,
1872 "XFS: dquot too small (%d) in %s.", 1875 "XFS: dquot too small (%d) in %s.",
1873 item->ri_buf[i].i_len, __func__); 1876 item->ri_buf[i].i_len, __func__);
1874 goto next; 1877 goto next;
1875 } 1878 }
1876 error = xfs_qm_dqcheck(item->ri_buf[i].i_addr, 1879 error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr,
1877 -1, 0, XFS_QMOPT_DOWARN, 1880 -1, 0, XFS_QMOPT_DOWARN,
1878 "dquot_buf_recover"); 1881 "dquot_buf_recover");
1879 if (error) 1882 if (error)
@@ -1898,6 +1901,7 @@ xlog_recover_do_reg_buffer(
1898 */ 1901 */
1899int 1902int
1900xfs_qm_dqcheck( 1903xfs_qm_dqcheck(
1904 struct xfs_mount *mp,
1901 xfs_disk_dquot_t *ddq, 1905 xfs_disk_dquot_t *ddq,
1902 xfs_dqid_t id, 1906 xfs_dqid_t id,
1903 uint type, /* used only when IO_dorepair is true */ 1907 uint type, /* used only when IO_dorepair is true */
@@ -1924,14 +1928,14 @@ xfs_qm_dqcheck(
1924 */ 1928 */
1925 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { 1929 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
1926 if (flags & XFS_QMOPT_DOWARN) 1930 if (flags & XFS_QMOPT_DOWARN)
1927 cmn_err(CE_ALERT, 1931 xfs_alert(mp,
1928 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 1932 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
1929 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); 1933 str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC);
1930 errs++; 1934 errs++;
1931 } 1935 }
1932 if (ddq->d_version != XFS_DQUOT_VERSION) { 1936 if (ddq->d_version != XFS_DQUOT_VERSION) {
1933 if (flags & XFS_QMOPT_DOWARN) 1937 if (flags & XFS_QMOPT_DOWARN)
1934 cmn_err(CE_ALERT, 1938 xfs_alert(mp,
1935 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", 1939 "%s : XFS dquot ID 0x%x, version 0x%x != 0x%x",
1936 str, id, ddq->d_version, XFS_DQUOT_VERSION); 1940 str, id, ddq->d_version, XFS_DQUOT_VERSION);
1937 errs++; 1941 errs++;
@@ -1941,7 +1945,7 @@ xfs_qm_dqcheck(
1941 ddq->d_flags != XFS_DQ_PROJ && 1945 ddq->d_flags != XFS_DQ_PROJ &&
1942 ddq->d_flags != XFS_DQ_GROUP) { 1946 ddq->d_flags != XFS_DQ_GROUP) {
1943 if (flags & XFS_QMOPT_DOWARN) 1947 if (flags & XFS_QMOPT_DOWARN)
1944 cmn_err(CE_ALERT, 1948 xfs_alert(mp,
1945 "%s : XFS dquot ID 0x%x, unknown flags 0x%x", 1949 "%s : XFS dquot ID 0x%x, unknown flags 0x%x",
1946 str, id, ddq->d_flags); 1950 str, id, ddq->d_flags);
1947 errs++; 1951 errs++;
@@ -1949,7 +1953,7 @@ xfs_qm_dqcheck(
1949 1953
1950 if (id != -1 && id != be32_to_cpu(ddq->d_id)) { 1954 if (id != -1 && id != be32_to_cpu(ddq->d_id)) {
1951 if (flags & XFS_QMOPT_DOWARN) 1955 if (flags & XFS_QMOPT_DOWARN)
1952 cmn_err(CE_ALERT, 1956 xfs_alert(mp,
1953 "%s : ondisk-dquot 0x%p, ID mismatch: " 1957 "%s : ondisk-dquot 0x%p, ID mismatch: "
1954 "0x%x expected, found id 0x%x", 1958 "0x%x expected, found id 0x%x",
1955 str, ddq, id, be32_to_cpu(ddq->d_id)); 1959 str, ddq, id, be32_to_cpu(ddq->d_id));
@@ -1962,9 +1966,8 @@ xfs_qm_dqcheck(
1962 be64_to_cpu(ddq->d_blk_softlimit)) { 1966 be64_to_cpu(ddq->d_blk_softlimit)) {
1963 if (!ddq->d_btimer) { 1967 if (!ddq->d_btimer) {
1964 if (flags & XFS_QMOPT_DOWARN) 1968 if (flags & XFS_QMOPT_DOWARN)
1965 cmn_err(CE_ALERT, 1969 xfs_alert(mp,
1966 "%s : Dquot ID 0x%x (0x%p) " 1970 "%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED",
1967 "BLK TIMER NOT STARTED",
1968 str, (int)be32_to_cpu(ddq->d_id), ddq); 1971 str, (int)be32_to_cpu(ddq->d_id), ddq);
1969 errs++; 1972 errs++;
1970 } 1973 }
@@ -1974,9 +1977,8 @@ xfs_qm_dqcheck(
1974 be64_to_cpu(ddq->d_ino_softlimit)) { 1977 be64_to_cpu(ddq->d_ino_softlimit)) {
1975 if (!ddq->d_itimer) { 1978 if (!ddq->d_itimer) {
1976 if (flags & XFS_QMOPT_DOWARN) 1979 if (flags & XFS_QMOPT_DOWARN)
1977 cmn_err(CE_ALERT, 1980 xfs_alert(mp,
1978 "%s : Dquot ID 0x%x (0x%p) " 1981 "%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED",
1979 "INODE TIMER NOT STARTED",
1980 str, (int)be32_to_cpu(ddq->d_id), ddq); 1982 str, (int)be32_to_cpu(ddq->d_id), ddq);
1981 errs++; 1983 errs++;
1982 } 1984 }
@@ -1986,9 +1988,8 @@ xfs_qm_dqcheck(
1986 be64_to_cpu(ddq->d_rtb_softlimit)) { 1988 be64_to_cpu(ddq->d_rtb_softlimit)) {
1987 if (!ddq->d_rtbtimer) { 1989 if (!ddq->d_rtbtimer) {
1988 if (flags & XFS_QMOPT_DOWARN) 1990 if (flags & XFS_QMOPT_DOWARN)
1989 cmn_err(CE_ALERT, 1991 xfs_alert(mp,
1990 "%s : Dquot ID 0x%x (0x%p) " 1992 "%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED",
1991 "RTBLK TIMER NOT STARTED",
1992 str, (int)be32_to_cpu(ddq->d_id), ddq); 1993 str, (int)be32_to_cpu(ddq->d_id), ddq);
1993 errs++; 1994 errs++;
1994 } 1995 }
@@ -1999,7 +2000,7 @@ xfs_qm_dqcheck(
1999 return errs; 2000 return errs;
2000 2001
2001 if (flags & XFS_QMOPT_DOWARN) 2002 if (flags & XFS_QMOPT_DOWARN)
2002 cmn_err(CE_NOTE, "Re-initializing dquot ID 0x%x", id); 2003 xfs_notice(mp, "Re-initializing dquot ID 0x%x", id);
2003 2004
2004 /* 2005 /*
2005 * Typically, a repair is only requested by quotacheck. 2006 * Typically, a repair is only requested by quotacheck.
@@ -2218,9 +2219,9 @@ xlog_recover_inode_pass2(
2218 */ 2219 */
2219 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { 2220 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
2220 xfs_buf_relse(bp); 2221 xfs_buf_relse(bp);
2221 xfs_fs_cmn_err(CE_ALERT, mp, 2222 xfs_alert(mp,
2222 "xfs_inode_recover: Bad inode magic number, dino ptr = 0x%p, dino bp = 0x%p, ino = %Ld", 2223 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
2223 dip, bp, in_f->ilf_ino); 2224 __func__, dip, bp, in_f->ilf_ino);
2224 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)", 2225 XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
2225 XFS_ERRLEVEL_LOW, mp); 2226 XFS_ERRLEVEL_LOW, mp);
2226 error = EFSCORRUPTED; 2227 error = EFSCORRUPTED;
@@ -2229,9 +2230,9 @@ xlog_recover_inode_pass2(
2229 dicp = item->ri_buf[1].i_addr; 2230 dicp = item->ri_buf[1].i_addr;
2230 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) { 2231 if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
2231 xfs_buf_relse(bp); 2232 xfs_buf_relse(bp);
2232 xfs_fs_cmn_err(CE_ALERT, mp, 2233 xfs_alert(mp,
2233 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, ino %Ld", 2234 "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
2234 item, in_f->ilf_ino); 2235 __func__, item, in_f->ilf_ino);
2235 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)", 2236 XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
2236 XFS_ERRLEVEL_LOW, mp); 2237 XFS_ERRLEVEL_LOW, mp);
2237 error = EFSCORRUPTED; 2238 error = EFSCORRUPTED;
@@ -2263,9 +2264,10 @@ xlog_recover_inode_pass2(
2263 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)", 2264 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
2264 XFS_ERRLEVEL_LOW, mp, dicp); 2265 XFS_ERRLEVEL_LOW, mp, dicp);
2265 xfs_buf_relse(bp); 2266 xfs_buf_relse(bp);
2266 xfs_fs_cmn_err(CE_ALERT, mp, 2267 xfs_alert(mp,
2267 "xfs_inode_recover: Bad regular inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2268 "%s: Bad regular inode log record, rec ptr 0x%p, "
2268 item, dip, bp, in_f->ilf_ino); 2269 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2270 __func__, item, dip, bp, in_f->ilf_ino);
2269 error = EFSCORRUPTED; 2271 error = EFSCORRUPTED;
2270 goto error; 2272 goto error;
2271 } 2273 }
@@ -2276,9 +2278,10 @@ xlog_recover_inode_pass2(
2276 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)", 2278 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
2277 XFS_ERRLEVEL_LOW, mp, dicp); 2279 XFS_ERRLEVEL_LOW, mp, dicp);
2278 xfs_buf_relse(bp); 2280 xfs_buf_relse(bp);
2279 xfs_fs_cmn_err(CE_ALERT, mp, 2281 xfs_alert(mp,
2280 "xfs_inode_recover: Bad dir inode log record, rec ptr 0x%p, ino ptr = 0x%p, ino bp = 0x%p, ino %Ld", 2282 "%s: Bad dir inode log record, rec ptr 0x%p, "
2281 item, dip, bp, in_f->ilf_ino); 2283 "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
2284 __func__, item, dip, bp, in_f->ilf_ino);
2282 error = EFSCORRUPTED; 2285 error = EFSCORRUPTED;
2283 goto error; 2286 goto error;
2284 } 2287 }
@@ -2287,9 +2290,10 @@ xlog_recover_inode_pass2(
2287 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)", 2290 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
2288 XFS_ERRLEVEL_LOW, mp, dicp); 2291 XFS_ERRLEVEL_LOW, mp, dicp);
2289 xfs_buf_relse(bp); 2292 xfs_buf_relse(bp);
2290 xfs_fs_cmn_err(CE_ALERT, mp, 2293 xfs_alert(mp,
2291 "xfs_inode_recover: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld", 2294 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2292 item, dip, bp, in_f->ilf_ino, 2295 "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
2296 __func__, item, dip, bp, in_f->ilf_ino,
2293 dicp->di_nextents + dicp->di_anextents, 2297 dicp->di_nextents + dicp->di_anextents,
2294 dicp->di_nblocks); 2298 dicp->di_nblocks);
2295 error = EFSCORRUPTED; 2299 error = EFSCORRUPTED;
@@ -2299,8 +2303,9 @@ xlog_recover_inode_pass2(
2299 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)", 2303 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
2300 XFS_ERRLEVEL_LOW, mp, dicp); 2304 XFS_ERRLEVEL_LOW, mp, dicp);
2301 xfs_buf_relse(bp); 2305 xfs_buf_relse(bp);
2302 xfs_fs_cmn_err(CE_ALERT, mp, 2306 xfs_alert(mp,
2303 "xfs_inode_recover: Bad inode log rec ptr 0x%p, dino ptr 0x%p, dino bp 0x%p, ino %Ld, forkoff 0x%x", 2307 "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
2308 "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
2304 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff); 2309 item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
2305 error = EFSCORRUPTED; 2310 error = EFSCORRUPTED;
2306 goto error; 2311 goto error;
@@ -2309,9 +2314,9 @@ xlog_recover_inode_pass2(
2309 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)", 2314 XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
2310 XFS_ERRLEVEL_LOW, mp, dicp); 2315 XFS_ERRLEVEL_LOW, mp, dicp);
2311 xfs_buf_relse(bp); 2316 xfs_buf_relse(bp);
2312 xfs_fs_cmn_err(CE_ALERT, mp, 2317 xfs_alert(mp,
2313 "xfs_inode_recover: Bad inode log record length %d, rec ptr 0x%p", 2318 "%s: Bad inode log record length %d, rec ptr 0x%p",
2314 item->ri_buf[1].i_len, item); 2319 __func__, item->ri_buf[1].i_len, item);
2315 error = EFSCORRUPTED; 2320 error = EFSCORRUPTED;
2316 goto error; 2321 goto error;
2317 } 2322 }
@@ -2398,7 +2403,7 @@ xlog_recover_inode_pass2(
2398 break; 2403 break;
2399 2404
2400 default: 2405 default:
2401 xlog_warn("XFS: xlog_recover_inode_pass2: Invalid flag"); 2406 xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
2402 ASSERT(0); 2407 ASSERT(0);
2403 xfs_buf_relse(bp); 2408 xfs_buf_relse(bp);
2404 error = EIO; 2409 error = EIO;
@@ -2467,13 +2472,11 @@ xlog_recover_dquot_pass2(
2467 2472
2468 recddq = item->ri_buf[1].i_addr; 2473 recddq = item->ri_buf[1].i_addr;
2469 if (recddq == NULL) { 2474 if (recddq == NULL) {
2470 cmn_err(CE_ALERT, 2475 xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
2471 "XFS: NULL dquot in %s.", __func__);
2472 return XFS_ERROR(EIO); 2476 return XFS_ERROR(EIO);
2473 } 2477 }
2474 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) { 2478 if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
2475 cmn_err(CE_ALERT, 2479 xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
2476 "XFS: dquot too small (%d) in %s.",
2477 item->ri_buf[1].i_len, __func__); 2480 item->ri_buf[1].i_len, __func__);
2478 return XFS_ERROR(EIO); 2481 return XFS_ERROR(EIO);
2479 } 2482 }
@@ -2498,12 +2501,10 @@ xlog_recover_dquot_pass2(
2498 */ 2501 */
2499 dq_f = item->ri_buf[0].i_addr; 2502 dq_f = item->ri_buf[0].i_addr;
2500 ASSERT(dq_f); 2503 ASSERT(dq_f);
2501 if ((error = xfs_qm_dqcheck(recddq, 2504 error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2502 dq_f->qlf_id, 2505 "xlog_recover_dquot_pass2 (log copy)");
2503 0, XFS_QMOPT_DOWARN, 2506 if (error)
2504 "xlog_recover_dquot_pass2 (log copy)"))) {
2505 return XFS_ERROR(EIO); 2507 return XFS_ERROR(EIO);
2506 }
2507 ASSERT(dq_f->qlf_len == 1); 2508 ASSERT(dq_f->qlf_len == 1);
2508 2509
2509 error = xfs_read_buf(mp, mp->m_ddev_targp, 2510 error = xfs_read_buf(mp, mp->m_ddev_targp,
@@ -2523,8 +2524,9 @@ xlog_recover_dquot_pass2(
2523 * was among a chunk of dquots created earlier, and we did some 2524 * was among a chunk of dquots created earlier, and we did some
2524 * minimal initialization then. 2525 * minimal initialization then.
2525 */ 2526 */
2526 if (xfs_qm_dqcheck(ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, 2527 error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,
2527 "xlog_recover_dquot_pass2")) { 2528 "xlog_recover_dquot_pass2");
2529 if (error) {
2528 xfs_buf_relse(bp); 2530 xfs_buf_relse(bp);
2529 return XFS_ERROR(EIO); 2531 return XFS_ERROR(EIO);
2530 } 2532 }
@@ -2676,9 +2678,8 @@ xlog_recover_commit_pass1(
2676 /* nothing to do in pass 1 */ 2678 /* nothing to do in pass 1 */
2677 return 0; 2679 return 0;
2678 default: 2680 default:
2679 xlog_warn( 2681 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
2680 "XFS: invalid item type (%d) xlog_recover_commit_pass1", 2682 __func__, ITEM_TYPE(item));
2681 ITEM_TYPE(item));
2682 ASSERT(0); 2683 ASSERT(0);
2683 return XFS_ERROR(EIO); 2684 return XFS_ERROR(EIO);
2684 } 2685 }
@@ -2707,9 +2708,8 @@ xlog_recover_commit_pass2(
2707 /* nothing to do in pass2 */ 2708 /* nothing to do in pass2 */
2708 return 0; 2709 return 0;
2709 default: 2710 default:
2710 xlog_warn( 2711 xfs_warn(log->l_mp, "%s: invalid item type (%d)",
2711 "XFS: invalid item type (%d) xlog_recover_commit_pass2", 2712 __func__, ITEM_TYPE(item));
2712 ITEM_TYPE(item));
2713 ASSERT(0); 2713 ASSERT(0);
2714 return XFS_ERROR(EIO); 2714 return XFS_ERROR(EIO);
2715 } 2715 }
@@ -2751,10 +2751,11 @@ xlog_recover_commit_trans(
2751 2751
2752STATIC int 2752STATIC int
2753xlog_recover_unmount_trans( 2753xlog_recover_unmount_trans(
2754 struct log *log,
2754 xlog_recover_t *trans) 2755 xlog_recover_t *trans)
2755{ 2756{
2756 /* Do nothing now */ 2757 /* Do nothing now */
2757 xlog_warn("XFS: xlog_recover_unmount_trans: Unmount LR"); 2758 xfs_warn(log->l_mp, "%s: Unmount LR", __func__);
2758 return 0; 2759 return 0;
2759} 2760}
2760 2761
@@ -2797,8 +2798,8 @@ xlog_recover_process_data(
2797 dp += sizeof(xlog_op_header_t); 2798 dp += sizeof(xlog_op_header_t);
2798 if (ohead->oh_clientid != XFS_TRANSACTION && 2799 if (ohead->oh_clientid != XFS_TRANSACTION &&
2799 ohead->oh_clientid != XFS_LOG) { 2800 ohead->oh_clientid != XFS_LOG) {
2800 xlog_warn( 2801 xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
2801 "XFS: xlog_recover_process_data: bad clientid"); 2802 __func__, ohead->oh_clientid);
2802 ASSERT(0); 2803 ASSERT(0);
2803 return (XFS_ERROR(EIO)); 2804 return (XFS_ERROR(EIO));
2804 } 2805 }
@@ -2811,8 +2812,8 @@ xlog_recover_process_data(
2811 be64_to_cpu(rhead->h_lsn)); 2812 be64_to_cpu(rhead->h_lsn));
2812 } else { 2813 } else {
2813 if (dp + be32_to_cpu(ohead->oh_len) > lp) { 2814 if (dp + be32_to_cpu(ohead->oh_len) > lp) {
2814 xlog_warn( 2815 xfs_warn(log->l_mp, "%s: bad length 0x%x",
2815 "XFS: xlog_recover_process_data: bad length"); 2816 __func__, be32_to_cpu(ohead->oh_len));
2816 WARN_ON(1); 2817 WARN_ON(1);
2817 return (XFS_ERROR(EIO)); 2818 return (XFS_ERROR(EIO));
2818 } 2819 }
@@ -2825,7 +2826,7 @@ xlog_recover_process_data(
2825 trans, pass); 2826 trans, pass);
2826 break; 2827 break;
2827 case XLOG_UNMOUNT_TRANS: 2828 case XLOG_UNMOUNT_TRANS:
2828 error = xlog_recover_unmount_trans(trans); 2829 error = xlog_recover_unmount_trans(log, trans);
2829 break; 2830 break;
2830 case XLOG_WAS_CONT_TRANS: 2831 case XLOG_WAS_CONT_TRANS:
2831 error = xlog_recover_add_to_cont_trans(log, 2832 error = xlog_recover_add_to_cont_trans(log,
@@ -2833,8 +2834,8 @@ xlog_recover_process_data(
2833 be32_to_cpu(ohead->oh_len)); 2834 be32_to_cpu(ohead->oh_len));
2834 break; 2835 break;
2835 case XLOG_START_TRANS: 2836 case XLOG_START_TRANS:
2836 xlog_warn( 2837 xfs_warn(log->l_mp, "%s: bad transaction",
2837 "XFS: xlog_recover_process_data: bad transaction"); 2838 __func__);
2838 ASSERT(0); 2839 ASSERT(0);
2839 error = XFS_ERROR(EIO); 2840 error = XFS_ERROR(EIO);
2840 break; 2841 break;
@@ -2844,8 +2845,8 @@ xlog_recover_process_data(
2844 dp, be32_to_cpu(ohead->oh_len)); 2845 dp, be32_to_cpu(ohead->oh_len));
2845 break; 2846 break;
2846 default: 2847 default:
2847 xlog_warn( 2848 xfs_warn(log->l_mp, "%s: bad flag 0x%x",
2848 "XFS: xlog_recover_process_data: bad flag"); 2849 __func__, flags);
2849 ASSERT(0); 2850 ASSERT(0);
2850 error = XFS_ERROR(EIO); 2851 error = XFS_ERROR(EIO);
2851 break; 2852 break;
@@ -3030,8 +3031,7 @@ xlog_recover_clear_agi_bucket(
3030out_abort: 3031out_abort:
3031 xfs_trans_cancel(tp, XFS_TRANS_ABORT); 3032 xfs_trans_cancel(tp, XFS_TRANS_ABORT);
3032out_error: 3033out_error:
3033 xfs_fs_cmn_err(CE_WARN, mp, "xlog_recover_clear_agi_bucket: " 3034 xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
3034 "failed to clear agi %d. Continuing.", agno);
3035 return; 3035 return;
3036} 3036}
3037 3037
@@ -3282,7 +3282,7 @@ xlog_valid_rec_header(
3282 if (unlikely( 3282 if (unlikely(
3283 (!rhead->h_version || 3283 (!rhead->h_version ||
3284 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { 3284 (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
3285 xlog_warn("XFS: %s: unrecognised log version (%d).", 3285 xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
3286 __func__, be32_to_cpu(rhead->h_version)); 3286 __func__, be32_to_cpu(rhead->h_version));
3287 return XFS_ERROR(EIO); 3287 return XFS_ERROR(EIO);
3288 } 3288 }
@@ -3740,10 +3740,9 @@ xlog_recover(
3740 return error; 3740 return error;
3741 } 3741 }
3742 3742
3743 cmn_err(CE_NOTE, 3743 xfs_notice(log->l_mp, "Starting recovery (logdev: %s)",
3744 "Starting XFS recovery on filesystem: %s (logdev: %s)", 3744 log->l_mp->m_logname ? log->l_mp->m_logname
3745 log->l_mp->m_fsname, log->l_mp->m_logname ? 3745 : "internal");
3746 log->l_mp->m_logname : "internal");
3747 3746
3748 error = xlog_do_recover(log, head_blk, tail_blk); 3747 error = xlog_do_recover(log, head_blk, tail_blk);
3749 log->l_flags |= XLOG_RECOVERY_NEEDED; 3748 log->l_flags |= XLOG_RECOVERY_NEEDED;
@@ -3776,9 +3775,7 @@ xlog_recover_finish(
3776 int error; 3775 int error;
3777 error = xlog_recover_process_efis(log); 3776 error = xlog_recover_process_efis(log);
3778 if (error) { 3777 if (error) {
3779 cmn_err(CE_ALERT, 3778 xfs_alert(log->l_mp, "Failed to recover EFIs");
3780 "Failed to recover EFIs on filesystem: %s",
3781 log->l_mp->m_fsname);
3782 return error; 3779 return error;
3783 } 3780 }
3784 /* 3781 /*
@@ -3793,15 +3790,12 @@ xlog_recover_finish(
3793 3790
3794 xlog_recover_check_summary(log); 3791 xlog_recover_check_summary(log);
3795 3792
3796 cmn_err(CE_NOTE, 3793 xfs_notice(log->l_mp, "Ending recovery (logdev: %s)",
3797 "Ending XFS recovery on filesystem: %s (logdev: %s)", 3794 log->l_mp->m_logname ? log->l_mp->m_logname
3798 log->l_mp->m_fsname, log->l_mp->m_logname ? 3795 : "internal");
3799 log->l_mp->m_logname : "internal");
3800 log->l_flags &= ~XLOG_RECOVERY_NEEDED; 3796 log->l_flags &= ~XLOG_RECOVERY_NEEDED;
3801 } else { 3797 } else {
3802 cmn_err(CE_DEBUG, 3798 xfs_info(log->l_mp, "Ending clean mount");
3803 "Ending clean XFS mount for filesystem: %s\n",
3804 log->l_mp->m_fsname);
3805 } 3799 }
3806 return 0; 3800 return 0;
3807} 3801}
@@ -3834,10 +3828,8 @@ xlog_recover_check_summary(
3834 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { 3828 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
3835 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp); 3829 error = xfs_read_agf(mp, NULL, agno, 0, &agfbp);
3836 if (error) { 3830 if (error) {
3837 xfs_fs_cmn_err(CE_ALERT, mp, 3831 xfs_alert(mp, "%s agf read failed agno %d error %d",
3838 "xlog_recover_check_summary(agf)" 3832 __func__, agno, error);
3839 "agf read failed agno %d error %d",
3840 agno, error);
3841 } else { 3833 } else {
3842 agfp = XFS_BUF_TO_AGF(agfbp); 3834 agfp = XFS_BUF_TO_AGF(agfbp);
3843 freeblks += be32_to_cpu(agfp->agf_freeblks) + 3835 freeblks += be32_to_cpu(agfp->agf_freeblks) +
@@ -3846,7 +3838,10 @@ xlog_recover_check_summary(
3846 } 3838 }
3847 3839
3848 error = xfs_read_agi(mp, NULL, agno, &agibp); 3840 error = xfs_read_agi(mp, NULL, agno, &agibp);
3849 if (!error) { 3841 if (error) {
3842 xfs_alert(mp, "%s agi read failed agno %d error %d",
3843 __func__, agno, error);
3844 } else {
3850 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp); 3845 struct xfs_agi *agi = XFS_BUF_TO_AGI(agibp);
3851 3846
3852 itotal += be32_to_cpu(agi->agi_count); 3847 itotal += be32_to_cpu(agi->agi_count);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index d447aef84bc3..bb3f9a7b24ed 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -133,9 +133,7 @@ xfs_uuid_mount(
133 return 0; 133 return 0;
134 134
135 if (uuid_is_nil(uuid)) { 135 if (uuid_is_nil(uuid)) {
136 cmn_err(CE_WARN, 136 xfs_warn(mp, "Filesystem has nil UUID - can't mount");
137 "XFS: Filesystem %s has nil UUID - can't mount",
138 mp->m_fsname);
139 return XFS_ERROR(EINVAL); 137 return XFS_ERROR(EINVAL);
140 } 138 }
141 139
@@ -163,8 +161,7 @@ xfs_uuid_mount(
163 161
164 out_duplicate: 162 out_duplicate:
165 mutex_unlock(&xfs_uuid_table_mutex); 163 mutex_unlock(&xfs_uuid_table_mutex);
166 cmn_err(CE_WARN, "XFS: Filesystem %s has duplicate UUID - can't mount", 164 xfs_warn(mp, "Filesystem has duplicate UUID - can't mount");
167 mp->m_fsname);
168 return XFS_ERROR(EINVAL); 165 return XFS_ERROR(EINVAL);
169} 166}
170 167
@@ -311,6 +308,8 @@ xfs_mount_validate_sb(
311 xfs_sb_t *sbp, 308 xfs_sb_t *sbp,
312 int flags) 309 int flags)
313{ 310{
311 int loud = !(flags & XFS_MFSI_QUIET);
312
314 /* 313 /*
315 * If the log device and data device have the 314 * If the log device and data device have the
316 * same device number, the log is internal. 315 * same device number, the log is internal.
@@ -319,28 +318,32 @@ xfs_mount_validate_sb(
319 * a volume filesystem in a non-volume manner. 318 * a volume filesystem in a non-volume manner.
320 */ 319 */
321 if (sbp->sb_magicnum != XFS_SB_MAGIC) { 320 if (sbp->sb_magicnum != XFS_SB_MAGIC) {
322 xfs_fs_mount_cmn_err(flags, "bad magic number"); 321 if (loud)
322 xfs_warn(mp, "bad magic number");
323 return XFS_ERROR(EWRONGFS); 323 return XFS_ERROR(EWRONGFS);
324 } 324 }
325 325
326 if (!xfs_sb_good_version(sbp)) { 326 if (!xfs_sb_good_version(sbp)) {
327 xfs_fs_mount_cmn_err(flags, "bad version"); 327 if (loud)
328 xfs_warn(mp, "bad version");
328 return XFS_ERROR(EWRONGFS); 329 return XFS_ERROR(EWRONGFS);
329 } 330 }
330 331
331 if (unlikely( 332 if (unlikely(
332 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { 333 sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) {
333 xfs_fs_mount_cmn_err(flags, 334 if (loud)
334 "filesystem is marked as having an external log; " 335 xfs_warn(mp,
335 "specify logdev on the\nmount command line."); 336 "filesystem is marked as having an external log; "
337 "specify logdev on the mount command line.");
336 return XFS_ERROR(EINVAL); 338 return XFS_ERROR(EINVAL);
337 } 339 }
338 340
339 if (unlikely( 341 if (unlikely(
340 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { 342 sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) {
341 xfs_fs_mount_cmn_err(flags, 343 if (loud)
342 "filesystem is marked as having an internal log; " 344 xfs_warn(mp,
343 "do not specify logdev on\nthe mount command line."); 345 "filesystem is marked as having an internal log; "
346 "do not specify logdev on the mount command line.");
344 return XFS_ERROR(EINVAL); 347 return XFS_ERROR(EINVAL);
345 } 348 }
346 349
@@ -369,7 +372,8 @@ xfs_mount_validate_sb(
369 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 372 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
370 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 373 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
371 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 374 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
372 xfs_fs_mount_cmn_err(flags, "SB sanity check 1 failed"); 375 if (loud)
376 xfs_warn(mp, "SB sanity check 1 failed");
373 return XFS_ERROR(EFSCORRUPTED); 377 return XFS_ERROR(EFSCORRUPTED);
374 } 378 }
375 379
@@ -382,7 +386,8 @@ xfs_mount_validate_sb(
382 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks || 386 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
383 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) * 387 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
384 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) { 388 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
385 xfs_fs_mount_cmn_err(flags, "SB sanity check 2 failed"); 389 if (loud)
390 xfs_warn(mp, "SB sanity check 2 failed");
386 return XFS_ERROR(EFSCORRUPTED); 391 return XFS_ERROR(EFSCORRUPTED);
387 } 392 }
388 393
@@ -390,12 +395,12 @@ xfs_mount_validate_sb(
390 * Until this is fixed only page-sized or smaller data blocks work. 395 * Until this is fixed only page-sized or smaller data blocks work.
391 */ 396 */
392 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { 397 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
393 xfs_fs_mount_cmn_err(flags, 398 if (loud) {
394 "file system with blocksize %d bytes", 399 xfs_warn(mp,
395 sbp->sb_blocksize); 400 "File system with blocksize %d bytes. "
396 xfs_fs_mount_cmn_err(flags, 401 "Only pagesize (%ld) or less will currently work.",
397 "only pagesize (%ld) or less will currently work.", 402 sbp->sb_blocksize, PAGE_SIZE);
398 PAGE_SIZE); 403 }
399 return XFS_ERROR(ENOSYS); 404 return XFS_ERROR(ENOSYS);
400 } 405 }
401 406
@@ -409,21 +414,23 @@ xfs_mount_validate_sb(
409 case 2048: 414 case 2048:
410 break; 415 break;
411 default: 416 default:
412 xfs_fs_mount_cmn_err(flags, 417 if (loud)
413 "inode size of %d bytes not supported", 418 xfs_warn(mp, "inode size of %d bytes not supported",
414 sbp->sb_inodesize); 419 sbp->sb_inodesize);
415 return XFS_ERROR(ENOSYS); 420 return XFS_ERROR(ENOSYS);
416 } 421 }
417 422
418 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 423 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
419 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 424 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
420 xfs_fs_mount_cmn_err(flags, 425 if (loud)
421 "file system too large to be mounted on this system."); 426 xfs_warn(mp,
427 "file system too large to be mounted on this system.");
422 return XFS_ERROR(EFBIG); 428 return XFS_ERROR(EFBIG);
423 } 429 }
424 430
425 if (unlikely(sbp->sb_inprogress)) { 431 if (unlikely(sbp->sb_inprogress)) {
426 xfs_fs_mount_cmn_err(flags, "file system busy"); 432 if (loud)
433 xfs_warn(mp, "file system busy");
427 return XFS_ERROR(EFSCORRUPTED); 434 return XFS_ERROR(EFSCORRUPTED);
428 } 435 }
429 436
@@ -431,8 +438,9 @@ xfs_mount_validate_sb(
431 * Version 1 directory format has never worked on Linux. 438 * Version 1 directory format has never worked on Linux.
432 */ 439 */
433 if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { 440 if (unlikely(!xfs_sb_version_hasdirv2(sbp))) {
434 xfs_fs_mount_cmn_err(flags, 441 if (loud)
435 "file system using version 1 directory format"); 442 xfs_warn(mp,
443 "file system using version 1 directory format");
436 return XFS_ERROR(ENOSYS); 444 return XFS_ERROR(ENOSYS);
437 } 445 }
438 446
@@ -673,6 +681,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
673 unsigned int sector_size; 681 unsigned int sector_size;
674 xfs_buf_t *bp; 682 xfs_buf_t *bp;
675 int error; 683 int error;
684 int loud = !(flags & XFS_MFSI_QUIET);
676 685
677 ASSERT(mp->m_sb_bp == NULL); 686 ASSERT(mp->m_sb_bp == NULL);
678 ASSERT(mp->m_ddev_targp != NULL); 687 ASSERT(mp->m_ddev_targp != NULL);
@@ -688,7 +697,8 @@ reread:
688 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, 697 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
689 XFS_SB_DADDR, sector_size, 0); 698 XFS_SB_DADDR, sector_size, 0);
690 if (!bp) { 699 if (!bp) {
691 xfs_fs_mount_cmn_err(flags, "SB buffer read failed"); 700 if (loud)
701 xfs_warn(mp, "SB buffer read failed");
692 return EIO; 702 return EIO;
693 } 703 }
694 704
@@ -699,7 +709,8 @@ reread:
699 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); 709 xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
700 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); 710 error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
701 if (error) { 711 if (error) {
702 xfs_fs_mount_cmn_err(flags, "SB validate failed"); 712 if (loud)
713 xfs_warn(mp, "SB validate failed");
703 goto release_buf; 714 goto release_buf;
704 } 715 }
705 716
@@ -707,9 +718,9 @@ reread:
707 * We must be able to do sector-sized and sector-aligned IO. 718 * We must be able to do sector-sized and sector-aligned IO.
708 */ 719 */
709 if (sector_size > mp->m_sb.sb_sectsize) { 720 if (sector_size > mp->m_sb.sb_sectsize) {
710 xfs_fs_mount_cmn_err(flags, 721 if (loud)
711 "device supports only %u byte sectors (not %u)", 722 xfs_warn(mp, "device supports %u byte sectors (not %u)",
712 sector_size, mp->m_sb.sb_sectsize); 723 sector_size, mp->m_sb.sb_sectsize);
713 error = ENOSYS; 724 error = ENOSYS;
714 goto release_buf; 725 goto release_buf;
715 } 726 }
@@ -853,8 +864,7 @@ xfs_update_alignment(xfs_mount_t *mp)
853 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 864 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
854 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 865 (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
855 if (mp->m_flags & XFS_MOUNT_RETERR) { 866 if (mp->m_flags & XFS_MOUNT_RETERR) {
856 cmn_err(CE_WARN, 867 xfs_warn(mp, "alignment check 1 failed");
857 "XFS: alignment check 1 failed");
858 return XFS_ERROR(EINVAL); 868 return XFS_ERROR(EINVAL);
859 } 869 }
860 mp->m_dalign = mp->m_swidth = 0; 870 mp->m_dalign = mp->m_swidth = 0;
@@ -867,8 +877,9 @@ xfs_update_alignment(xfs_mount_t *mp)
867 if (mp->m_flags & XFS_MOUNT_RETERR) { 877 if (mp->m_flags & XFS_MOUNT_RETERR) {
868 return XFS_ERROR(EINVAL); 878 return XFS_ERROR(EINVAL);
869 } 879 }
870 xfs_fs_cmn_err(CE_WARN, mp, 880 xfs_warn(mp,
871"stripe alignment turned off: sunit(%d)/swidth(%d) incompatible with agsize(%d)", 881 "stripe alignment turned off: sunit(%d)/swidth(%d) "
882 "incompatible with agsize(%d)",
872 mp->m_dalign, mp->m_swidth, 883 mp->m_dalign, mp->m_swidth,
873 sbp->sb_agblocks); 884 sbp->sb_agblocks);
874 885
@@ -878,9 +889,9 @@ xfs_update_alignment(xfs_mount_t *mp)
878 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 889 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
879 } else { 890 } else {
880 if (mp->m_flags & XFS_MOUNT_RETERR) { 891 if (mp->m_flags & XFS_MOUNT_RETERR) {
881 xfs_fs_cmn_err(CE_WARN, mp, 892 xfs_warn(mp,
882"stripe alignment turned off: sunit(%d) less than bsize(%d)", 893 "stripe alignment turned off: sunit(%d) less than bsize(%d)",
883 mp->m_dalign, 894 mp->m_dalign,
884 mp->m_blockmask +1); 895 mp->m_blockmask +1);
885 return XFS_ERROR(EINVAL); 896 return XFS_ERROR(EINVAL);
886 } 897 }
@@ -1026,14 +1037,14 @@ xfs_check_sizes(xfs_mount_t *mp)
1026 1037
1027 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); 1038 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
1028 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { 1039 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
1029 cmn_err(CE_WARN, "XFS: filesystem size mismatch detected"); 1040 xfs_warn(mp, "filesystem size mismatch detected");
1030 return XFS_ERROR(EFBIG); 1041 return XFS_ERROR(EFBIG);
1031 } 1042 }
1032 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, 1043 bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
1033 d - XFS_FSS_TO_BB(mp, 1), 1044 d - XFS_FSS_TO_BB(mp, 1),
1034 BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); 1045 BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
1035 if (!bp) { 1046 if (!bp) {
1036 cmn_err(CE_WARN, "XFS: last sector read failed"); 1047 xfs_warn(mp, "last sector read failed");
1037 return EIO; 1048 return EIO;
1038 } 1049 }
1039 xfs_buf_relse(bp); 1050 xfs_buf_relse(bp);
@@ -1041,14 +1052,14 @@ xfs_check_sizes(xfs_mount_t *mp)
1041 if (mp->m_logdev_targp != mp->m_ddev_targp) { 1052 if (mp->m_logdev_targp != mp->m_ddev_targp) {
1042 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); 1053 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
1043 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { 1054 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
1044 cmn_err(CE_WARN, "XFS: log size mismatch detected"); 1055 xfs_warn(mp, "log size mismatch detected");
1045 return XFS_ERROR(EFBIG); 1056 return XFS_ERROR(EFBIG);
1046 } 1057 }
1047 bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, 1058 bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
1048 d - XFS_FSB_TO_BB(mp, 1), 1059 d - XFS_FSB_TO_BB(mp, 1),
1049 XFS_FSB_TO_B(mp, 1), 0); 1060 XFS_FSB_TO_B(mp, 1), 0);
1050 if (!bp) { 1061 if (!bp) {
1051 cmn_err(CE_WARN, "XFS: log device read failed"); 1062 xfs_warn(mp, "log device read failed");
1052 return EIO; 1063 return EIO;
1053 } 1064 }
1054 xfs_buf_relse(bp); 1065 xfs_buf_relse(bp);
@@ -1086,7 +1097,7 @@ xfs_mount_reset_sbqflags(
1086 return 0; 1097 return 0;
1087 1098
1088#ifdef QUOTADEBUG 1099#ifdef QUOTADEBUG
1089 xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); 1100 xfs_notice(mp, "Writing superblock quota changes");
1090#endif 1101#endif
1091 1102
1092 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1103 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
@@ -1094,8 +1105,7 @@ xfs_mount_reset_sbqflags(
1094 XFS_DEFAULT_LOG_COUNT); 1105 XFS_DEFAULT_LOG_COUNT);
1095 if (error) { 1106 if (error) {
1096 xfs_trans_cancel(tp, 0); 1107 xfs_trans_cancel(tp, 0);
1097 xfs_fs_cmn_err(CE_ALERT, mp, 1108 xfs_alert(mp, "%s: Superblock update failed!", __func__);
1098 "xfs_mount_reset_sbqflags: Superblock update failed!");
1099 return error; 1109 return error;
1100 } 1110 }
1101 1111
@@ -1161,8 +1171,7 @@ xfs_mountfs(
1161 * transaction subsystem is online. 1171 * transaction subsystem is online.
1162 */ 1172 */
1163 if (xfs_sb_has_mismatched_features2(sbp)) { 1173 if (xfs_sb_has_mismatched_features2(sbp)) {
1164 cmn_err(CE_WARN, 1174 xfs_warn(mp, "correcting sb_features alignment problem");
1165 "XFS: correcting sb_features alignment problem");
1166 sbp->sb_features2 |= sbp->sb_bad_features2; 1175 sbp->sb_features2 |= sbp->sb_bad_features2;
1167 sbp->sb_bad_features2 = sbp->sb_features2; 1176 sbp->sb_bad_features2 = sbp->sb_features2;
1168 mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2; 1177 mp->m_update_flags |= XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2;
@@ -1241,7 +1250,7 @@ xfs_mountfs(
1241 */ 1250 */
1242 error = xfs_rtmount_init(mp); 1251 error = xfs_rtmount_init(mp);
1243 if (error) { 1252 if (error) {
1244 cmn_err(CE_WARN, "XFS: RT mount failed"); 1253 xfs_warn(mp, "RT mount failed");
1245 goto out_remove_uuid; 1254 goto out_remove_uuid;
1246 } 1255 }
1247 1256
@@ -1272,12 +1281,12 @@ xfs_mountfs(
1272 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); 1281 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
1273 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); 1282 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1274 if (error) { 1283 if (error) {
1275 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error); 1284 xfs_warn(mp, "Failed per-ag init: %d", error);
1276 goto out_remove_uuid; 1285 goto out_remove_uuid;
1277 } 1286 }
1278 1287
1279 if (!sbp->sb_logblocks) { 1288 if (!sbp->sb_logblocks) {
1280 cmn_err(CE_WARN, "XFS: no log defined"); 1289 xfs_warn(mp, "no log defined");
1281 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp); 1290 XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
1282 error = XFS_ERROR(EFSCORRUPTED); 1291 error = XFS_ERROR(EFSCORRUPTED);
1283 goto out_free_perag; 1292 goto out_free_perag;
@@ -1290,7 +1299,7 @@ xfs_mountfs(
1290 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart), 1299 XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
1291 XFS_FSB_TO_BB(mp, sbp->sb_logblocks)); 1300 XFS_FSB_TO_BB(mp, sbp->sb_logblocks));
1292 if (error) { 1301 if (error) {
1293 cmn_err(CE_WARN, "XFS: log mount failed"); 1302 xfs_warn(mp, "log mount failed");
1294 goto out_free_perag; 1303 goto out_free_perag;
1295 } 1304 }
1296 1305
@@ -1327,16 +1336,14 @@ xfs_mountfs(
1327 */ 1336 */
1328 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip); 1337 error = xfs_iget(mp, NULL, sbp->sb_rootino, 0, XFS_ILOCK_EXCL, &rip);
1329 if (error) { 1338 if (error) {
1330 cmn_err(CE_WARN, "XFS: failed to read root inode"); 1339 xfs_warn(mp, "failed to read root inode");
1331 goto out_log_dealloc; 1340 goto out_log_dealloc;
1332 } 1341 }
1333 1342
1334 ASSERT(rip != NULL); 1343 ASSERT(rip != NULL);
1335 1344
1336 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) { 1345 if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
1337 cmn_err(CE_WARN, "XFS: corrupted root inode"); 1346 xfs_warn(mp, "corrupted root inode %llu: not a directory",
1338 cmn_err(CE_WARN, "Device %s - root %llu is not a directory",
1339 XFS_BUFTARG_NAME(mp->m_ddev_targp),
1340 (unsigned long long)rip->i_ino); 1347 (unsigned long long)rip->i_ino);
1341 xfs_iunlock(rip, XFS_ILOCK_EXCL); 1348 xfs_iunlock(rip, XFS_ILOCK_EXCL);
1342 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW, 1349 XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
@@ -1356,7 +1363,7 @@ xfs_mountfs(
1356 /* 1363 /*
1357 * Free up the root inode. 1364 * Free up the root inode.
1358 */ 1365 */
1359 cmn_err(CE_WARN, "XFS: failed to read RT inodes"); 1366 xfs_warn(mp, "failed to read RT inodes");
1360 goto out_rele_rip; 1367 goto out_rele_rip;
1361 } 1368 }
1362 1369
@@ -1368,7 +1375,7 @@ xfs_mountfs(
1368 if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) { 1375 if (mp->m_update_flags && !(mp->m_flags & XFS_MOUNT_RDONLY)) {
1369 error = xfs_mount_log_sb(mp, mp->m_update_flags); 1376 error = xfs_mount_log_sb(mp, mp->m_update_flags);
1370 if (error) { 1377 if (error) {
1371 cmn_err(CE_WARN, "XFS: failed to write sb changes"); 1378 xfs_warn(mp, "failed to write sb changes");
1372 goto out_rtunmount; 1379 goto out_rtunmount;
1373 } 1380 }
1374 } 1381 }
@@ -1389,10 +1396,7 @@ xfs_mountfs(
1389 * quotachecked license. 1396 * quotachecked license.
1390 */ 1397 */
1391 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) { 1398 if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
1392 cmn_err(CE_NOTE, 1399 xfs_notice(mp, "resetting quota flags");
1393 "XFS: resetting qflags for filesystem %s",
1394 mp->m_fsname);
1395
1396 error = xfs_mount_reset_sbqflags(mp); 1400 error = xfs_mount_reset_sbqflags(mp);
1397 if (error) 1401 if (error)
1398 return error; 1402 return error;
@@ -1406,7 +1410,7 @@ xfs_mountfs(
1406 */ 1410 */
1407 error = xfs_log_mount_finish(mp); 1411 error = xfs_log_mount_finish(mp);
1408 if (error) { 1412 if (error) {
1409 cmn_err(CE_WARN, "XFS: log mount finish failed"); 1413 xfs_warn(mp, "log mount finish failed");
1410 goto out_rtunmount; 1414 goto out_rtunmount;
1411 } 1415 }
1412 1416
@@ -1435,8 +1439,8 @@ xfs_mountfs(
1435 resblks = xfs_default_resblks(mp); 1439 resblks = xfs_default_resblks(mp);
1436 error = xfs_reserve_blocks(mp, &resblks, NULL); 1440 error = xfs_reserve_blocks(mp, &resblks, NULL);
1437 if (error) 1441 if (error)
1438 cmn_err(CE_WARN, "XFS: Unable to allocate reserve " 1442 xfs_warn(mp,
1439 "blocks. Continuing without a reserve pool."); 1443 "Unable to allocate reserve blocks. Continuing without reserve pool.");
1440 } 1444 }
1441 1445
1442 return 0; 1446 return 0;
@@ -1525,12 +1529,12 @@ xfs_unmountfs(
1525 resblks = 0; 1529 resblks = 0;
1526 error = xfs_reserve_blocks(mp, &resblks, NULL); 1530 error = xfs_reserve_blocks(mp, &resblks, NULL);
1527 if (error) 1531 if (error)
1528 cmn_err(CE_WARN, "XFS: Unable to free reserved block pool. " 1532 xfs_warn(mp, "Unable to free reserved block pool. "
1529 "Freespace may not be correct on next mount."); 1533 "Freespace may not be correct on next mount.");
1530 1534
1531 error = xfs_log_sbcount(mp, 1); 1535 error = xfs_log_sbcount(mp, 1);
1532 if (error) 1536 if (error)
1533 cmn_err(CE_WARN, "XFS: Unable to update superblock counters. " 1537 xfs_warn(mp, "Unable to update superblock counters. "
1534 "Freespace may not be correct on next mount."); 1538 "Freespace may not be correct on next mount.");
1535 xfs_unmountfs_writesb(mp); 1539 xfs_unmountfs_writesb(mp);
1536 xfs_unmountfs_wait(mp); /* wait for async bufs */ 1540 xfs_unmountfs_wait(mp); /* wait for async bufs */
@@ -2013,10 +2017,8 @@ xfs_dev_is_read_only(
2013 if (xfs_readonly_buftarg(mp->m_ddev_targp) || 2017 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
2014 xfs_readonly_buftarg(mp->m_logdev_targp) || 2018 xfs_readonly_buftarg(mp->m_logdev_targp) ||
2015 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) { 2019 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
2016 cmn_err(CE_NOTE, 2020 xfs_notice(mp, "%s required on read-only device.", message);
2017 "XFS: %s required on read-only device.", message); 2021 xfs_notice(mp, "write access unavailable, cannot proceed.");
2018 cmn_err(CE_NOTE,
2019 "XFS: write access unavailable, cannot proceed.");
2020 return EROFS; 2022 return EROFS;
2021 } 2023 }
2022 return 0; 2024 return 0;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index edfa178bafb6..4aff56395732 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void)
309 if (!xfs_mru_elem_zone) 309 if (!xfs_mru_elem_zone)
310 goto out; 310 goto out;
311 311
312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); 312 xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);
313 if (!xfs_mru_reap_wq) 313 if (!xfs_mru_reap_wq)
314 goto out_destroy_mru_elem_zone; 314 goto out_destroy_mru_elem_zone;
315 315
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 9bb6eda4cd21..a595f29567fe 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -382,7 +382,8 @@ static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
382 xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ 382 xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
383 f | XFS_QMOPT_RES_REGBLKS) 383 f | XFS_QMOPT_RES_REGBLKS)
384 384
385extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *); 385extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *,
386 xfs_dqid_t, uint, uint, char *);
386extern int xfs_mount_reset_sbqflags(struct xfs_mount *); 387extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
387 388
388#endif /* __KERNEL__ */ 389#endif /* __KERNEL__ */
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 12a191385310..8f76fdff4f46 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -76,7 +76,7 @@ xfs_growfs_rt_alloc(
76 xfs_mount_t *mp, /* file system mount point */ 76 xfs_mount_t *mp, /* file system mount point */
77 xfs_extlen_t oblocks, /* old count of blocks */ 77 xfs_extlen_t oblocks, /* old count of blocks */
78 xfs_extlen_t nblocks, /* new count of blocks */ 78 xfs_extlen_t nblocks, /* new count of blocks */
79 xfs_ino_t ino) /* inode number (bitmap/summary) */ 79 xfs_inode_t *ip) /* inode (bitmap/summary) */
80{ 80{
81 xfs_fileoff_t bno; /* block number in file */ 81 xfs_fileoff_t bno; /* block number in file */
82 xfs_buf_t *bp; /* temporary buffer for zeroing */ 82 xfs_buf_t *bp; /* temporary buffer for zeroing */
@@ -86,7 +86,6 @@ xfs_growfs_rt_alloc(
86 xfs_fsblock_t firstblock; /* first block allocated in xaction */ 86 xfs_fsblock_t firstblock; /* first block allocated in xaction */
87 xfs_bmap_free_t flist; /* list of freed blocks */ 87 xfs_bmap_free_t flist; /* list of freed blocks */
88 xfs_fsblock_t fsbno; /* filesystem block for bno */ 88 xfs_fsblock_t fsbno; /* filesystem block for bno */
89 xfs_inode_t *ip; /* pointer to incore inode */
90 xfs_bmbt_irec_t map; /* block map output */ 89 xfs_bmbt_irec_t map; /* block map output */
91 int nmap; /* number of block maps */ 90 int nmap; /* number of block maps */
92 int resblks; /* space reservation */ 91 int resblks; /* space reservation */
@@ -112,9 +111,9 @@ xfs_growfs_rt_alloc(
112 /* 111 /*
113 * Lock the inode. 112 * Lock the inode.
114 */ 113 */
115 if ((error = xfs_trans_iget(mp, tp, ino, 0, 114 xfs_ilock(ip, XFS_ILOCK_EXCL);
116 XFS_ILOCK_EXCL, &ip))) 115 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
117 goto error_cancel; 116
118 xfs_bmap_init(&flist, &firstblock); 117 xfs_bmap_init(&flist, &firstblock);
119 /* 118 /*
120 * Allocate blocks to the bitmap file. 119 * Allocate blocks to the bitmap file.
@@ -155,9 +154,8 @@ xfs_growfs_rt_alloc(
155 /* 154 /*
156 * Lock the bitmap inode. 155 * Lock the bitmap inode.
157 */ 156 */
158 if ((error = xfs_trans_iget(mp, tp, ino, 0, 157 xfs_ilock(ip, XFS_ILOCK_EXCL);
159 XFS_ILOCK_EXCL, &ip))) 158 xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
160 goto error_cancel;
161 /* 159 /*
162 * Get a buffer for the block. 160 * Get a buffer for the block.
163 */ 161 */
@@ -1854,7 +1852,6 @@ xfs_growfs_rt(
1854 xfs_rtblock_t bmbno; /* bitmap block number */ 1852 xfs_rtblock_t bmbno; /* bitmap block number */
1855 xfs_buf_t *bp; /* temporary buffer */ 1853 xfs_buf_t *bp; /* temporary buffer */
1856 int error; /* error return value */ 1854 int error; /* error return value */
1857 xfs_inode_t *ip; /* bitmap inode, used as lock */
1858 xfs_mount_t *nmp; /* new (fake) mount structure */ 1855 xfs_mount_t *nmp; /* new (fake) mount structure */
1859 xfs_drfsbno_t nrblocks; /* new number of realtime blocks */ 1856 xfs_drfsbno_t nrblocks; /* new number of realtime blocks */
1860 xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */ 1857 xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */
@@ -1918,11 +1915,11 @@ xfs_growfs_rt(
1918 /* 1915 /*
1919 * Allocate space to the bitmap and summary files, as necessary. 1916 * Allocate space to the bitmap and summary files, as necessary.
1920 */ 1917 */
1921 if ((error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, 1918 error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip);
1922 mp->m_sb.sb_rbmino))) 1919 if (error)
1923 return error; 1920 return error;
1924 if ((error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, 1921 error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip);
1925 mp->m_sb.sb_rsumino))) 1922 if (error)
1926 return error; 1923 return error;
1927 /* 1924 /*
1928 * Allocate a new (fake) mount/sb. 1925 * Allocate a new (fake) mount/sb.
@@ -1972,10 +1969,8 @@ xfs_growfs_rt(
1972 /* 1969 /*
1973 * Lock out other callers by grabbing the bitmap inode lock. 1970 * Lock out other callers by grabbing the bitmap inode lock.
1974 */ 1971 */
1975 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 1972 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
1976 XFS_ILOCK_EXCL, &ip))) 1973 xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
1977 goto error_cancel;
1978 ASSERT(ip == mp->m_rbmip);
1979 /* 1974 /*
1980 * Update the bitmap inode's size. 1975 * Update the bitmap inode's size.
1981 */ 1976 */
@@ -1986,10 +1981,8 @@ xfs_growfs_rt(
1986 /* 1981 /*
1987 * Get the summary inode into the transaction. 1982 * Get the summary inode into the transaction.
1988 */ 1983 */
1989 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rsumino, 0, 1984 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
1990 XFS_ILOCK_EXCL, &ip))) 1985 xfs_trans_ijoin_ref(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
1991 goto error_cancel;
1992 ASSERT(ip == mp->m_rsumip);
1993 /* 1986 /*
1994 * Update the summary inode's size. 1987 * Update the summary inode's size.
1995 */ 1988 */
@@ -2075,15 +2068,15 @@ xfs_rtallocate_extent(
2075 xfs_extlen_t prod, /* extent product factor */ 2068 xfs_extlen_t prod, /* extent product factor */
2076 xfs_rtblock_t *rtblock) /* out: start block allocated */ 2069 xfs_rtblock_t *rtblock) /* out: start block allocated */
2077{ 2070{
2071 xfs_mount_t *mp = tp->t_mountp;
2078 int error; /* error value */ 2072 int error; /* error value */
2079 xfs_inode_t *ip; /* inode for bitmap file */
2080 xfs_mount_t *mp; /* file system mount structure */
2081 xfs_rtblock_t r; /* result allocated block */ 2073 xfs_rtblock_t r; /* result allocated block */
2082 xfs_fsblock_t sb; /* summary file block number */ 2074 xfs_fsblock_t sb; /* summary file block number */
2083 xfs_buf_t *sumbp; /* summary file block buffer */ 2075 xfs_buf_t *sumbp; /* summary file block buffer */
2084 2076
2077 ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
2085 ASSERT(minlen > 0 && minlen <= maxlen); 2078 ASSERT(minlen > 0 && minlen <= maxlen);
2086 mp = tp->t_mountp; 2079
2087 /* 2080 /*
2088 * If prod is set then figure out what to do to minlen and maxlen. 2081 * If prod is set then figure out what to do to minlen and maxlen.
2089 */ 2082 */
@@ -2099,12 +2092,7 @@ xfs_rtallocate_extent(
2099 return 0; 2092 return 0;
2100 } 2093 }
2101 } 2094 }
2102 /* 2095
2103 * Lock out other callers by grabbing the bitmap inode lock.
2104 */
2105 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0,
2106 XFS_ILOCK_EXCL, &ip)))
2107 return error;
2108 sumbp = NULL; 2096 sumbp = NULL;
2109 /* 2097 /*
2110 * Allocate by size, or near another block, or exactly at some block. 2098 * Allocate by size, or near another block, or exactly at some block.
@@ -2123,11 +2111,12 @@ xfs_rtallocate_extent(
2123 len, &sumbp, &sb, prod, &r); 2111 len, &sumbp, &sb, prod, &r);
2124 break; 2112 break;
2125 default: 2113 default:
2114 error = EIO;
2126 ASSERT(0); 2115 ASSERT(0);
2127 } 2116 }
2128 if (error) { 2117 if (error)
2129 return error; 2118 return error;
2130 } 2119
2131 /* 2120 /*
2132 * If it worked, update the superblock. 2121 * If it worked, update the superblock.
2133 */ 2122 */
@@ -2155,7 +2144,6 @@ xfs_rtfree_extent(
2155 xfs_extlen_t len) /* length of extent freed */ 2144 xfs_extlen_t len) /* length of extent freed */
2156{ 2145{
2157 int error; /* error value */ 2146 int error; /* error value */
2158 xfs_inode_t *ip; /* bitmap file inode */
2159 xfs_mount_t *mp; /* file system mount structure */ 2147 xfs_mount_t *mp; /* file system mount structure */
2160 xfs_fsblock_t sb; /* summary file block number */ 2148 xfs_fsblock_t sb; /* summary file block number */
2161 xfs_buf_t *sumbp; /* summary file block buffer */ 2149 xfs_buf_t *sumbp; /* summary file block buffer */
@@ -2164,9 +2152,9 @@ xfs_rtfree_extent(
2164 /* 2152 /*
2165 * Synchronize by locking the bitmap inode. 2153 * Synchronize by locking the bitmap inode.
2166 */ 2154 */
2167 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 2155 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
2168 XFS_ILOCK_EXCL, &ip))) 2156 xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
2169 return error; 2157
2170#if defined(__KERNEL__) && defined(DEBUG) 2158#if defined(__KERNEL__) && defined(DEBUG)
2171 /* 2159 /*
2172 * Check to see that this whole range is currently allocated. 2160 * Check to see that this whole range is currently allocated.
@@ -2199,10 +2187,10 @@ xfs_rtfree_extent(
2199 */ 2187 */
2200 if (tp->t_frextents_delta + mp->m_sb.sb_frextents == 2188 if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
2201 mp->m_sb.sb_rextents) { 2189 mp->m_sb.sb_rextents) {
2202 if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) 2190 if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
2203 ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; 2191 mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2204 *(__uint64_t *)&ip->i_d.di_atime = 0; 2192 *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
2205 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2193 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
2206 } 2194 }
2207 return 0; 2195 return 0;
2208} 2196}
@@ -2222,8 +2210,8 @@ xfs_rtmount_init(
2222 if (sbp->sb_rblocks == 0) 2210 if (sbp->sb_rblocks == 0)
2223 return 0; 2211 return 0;
2224 if (mp->m_rtdev_targp == NULL) { 2212 if (mp->m_rtdev_targp == NULL) {
2225 cmn_err(CE_WARN, 2213 xfs_warn(mp,
2226 "XFS: This filesystem has a realtime volume, use rtdev=device option"); 2214 "Filesystem has a realtime volume, use rtdev=device option");
2227 return XFS_ERROR(ENODEV); 2215 return XFS_ERROR(ENODEV);
2228 } 2216 }
2229 mp->m_rsumlevels = sbp->sb_rextslog + 1; 2217 mp->m_rsumlevels = sbp->sb_rextslog + 1;
@@ -2237,7 +2225,7 @@ xfs_rtmount_init(
2237 */ 2225 */
2238 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); 2226 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
2239 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) { 2227 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_rblocks) {
2240 cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu", 2228 xfs_warn(mp, "realtime mount -- %llu != %llu",
2241 (unsigned long long) XFS_BB_TO_FSB(mp, d), 2229 (unsigned long long) XFS_BB_TO_FSB(mp, d),
2242 (unsigned long long) mp->m_sb.sb_rblocks); 2230 (unsigned long long) mp->m_sb.sb_rblocks);
2243 return XFS_ERROR(EFBIG); 2231 return XFS_ERROR(EFBIG);
@@ -2246,7 +2234,7 @@ xfs_rtmount_init(
2246 d - XFS_FSB_TO_BB(mp, 1), 2234 d - XFS_FSB_TO_BB(mp, 1),
2247 XFS_FSB_TO_B(mp, 1), 0); 2235 XFS_FSB_TO_B(mp, 1), 0);
2248 if (!bp) { 2236 if (!bp) {
2249 cmn_err(CE_WARN, "XFS: realtime device size check failed"); 2237 xfs_warn(mp, "realtime device size check failed");
2250 return EIO; 2238 return EIO;
2251 } 2239 }
2252 xfs_buf_relse(bp); 2240 xfs_buf_relse(bp);
@@ -2306,20 +2294,16 @@ xfs_rtpick_extent(
2306 xfs_rtblock_t *pick) /* result rt extent */ 2294 xfs_rtblock_t *pick) /* result rt extent */
2307{ 2295{
2308 xfs_rtblock_t b; /* result block */ 2296 xfs_rtblock_t b; /* result block */
2309 int error; /* error return value */
2310 xfs_inode_t *ip; /* bitmap incore inode */
2311 int log2; /* log of sequence number */ 2297 int log2; /* log of sequence number */
2312 __uint64_t resid; /* residual after log removed */ 2298 __uint64_t resid; /* residual after log removed */
2313 __uint64_t seq; /* sequence number of file creation */ 2299 __uint64_t seq; /* sequence number of file creation */
2314 __uint64_t *seqp; /* pointer to seqno in inode */ 2300 __uint64_t *seqp; /* pointer to seqno in inode */
2315 2301
2316 if ((error = xfs_trans_iget(mp, tp, mp->m_sb.sb_rbmino, 0, 2302 ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
2317 XFS_ILOCK_EXCL, &ip))) 2303
2318 return error; 2304 seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime;
2319 ASSERT(ip == mp->m_rbmip); 2305 if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
2320 seqp = (__uint64_t *)&ip->i_d.di_atime; 2306 mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2321 if (!(ip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
2322 ip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
2323 *seqp = 0; 2307 *seqp = 0;
2324 } 2308 }
2325 seq = *seqp; 2309 seq = *seqp;
@@ -2335,7 +2319,7 @@ xfs_rtpick_extent(
2335 b = mp->m_sb.sb_rextents - len; 2319 b = mp->m_sb.sb_rextents - len;
2336 } 2320 }
2337 *seqp = seq + 1; 2321 *seqp = seq + 1;
2338 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 2322 xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
2339 *pick = b; 2323 *pick = b;
2340 return 0; 2324 return 0;
2341} 2325}
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index ff614c29b441..09e1f4f35e97 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -154,7 +154,7 @@ xfs_rtmount_init(
154 if (mp->m_sb.sb_rblocks == 0) 154 if (mp->m_sb.sb_rblocks == 0)
155 return 0; 155 return 0;
156 156
157 cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT"); 157 xfs_warn(mp, "Not built with CONFIG_XFS_RT");
158 return ENOSYS; 158 return ENOSYS;
159} 159}
160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS)) 160# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 56861d5daaef..d6d6fdfe9422 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -49,9 +49,9 @@ xfs_do_force_shutdown(
49 logerror = flags & SHUTDOWN_LOG_IO_ERROR; 49 logerror = flags & SHUTDOWN_LOG_IO_ERROR;
50 50
51 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 51 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
52 cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from " 52 xfs_notice(mp,
53 "line %d of file %s. Return address = 0x%p", 53 "%s(0x%x) called from line %d of file %s. Return address = 0x%p",
54 mp->m_fsname, flags, lnnum, fname, __return_address); 54 __func__, flags, lnnum, fname, __return_address);
55 } 55 }
56 /* 56 /*
57 * No need to duplicate efforts. 57 * No need to duplicate efforts.
@@ -69,30 +69,25 @@ xfs_do_force_shutdown(
69 return; 69 return;
70 70
71 if (flags & SHUTDOWN_CORRUPT_INCORE) { 71 if (flags & SHUTDOWN_CORRUPT_INCORE) {
72 xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp, 72 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
73 "Corruption of in-memory data detected. Shutting down filesystem: %s", 73 "Corruption of in-memory data detected. Shutting down filesystem");
74 mp->m_fsname); 74 if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
75 if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
76 xfs_stack_trace(); 75 xfs_stack_trace();
77 }
78 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 76 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
79 if (logerror) { 77 if (logerror) {
80 xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp, 78 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
81 "Log I/O Error Detected. Shutting down filesystem: %s", 79 "Log I/O Error Detected. Shutting down filesystem");
82 mp->m_fsname);
83 } else if (flags & SHUTDOWN_DEVICE_REQ) { 80 } else if (flags & SHUTDOWN_DEVICE_REQ) {
84 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, 81 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
85 "All device paths lost. Shutting down filesystem: %s", 82 "All device paths lost. Shutting down filesystem");
86 mp->m_fsname);
87 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) { 83 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
88 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp, 84 xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
89 "I/O Error Detected. Shutting down filesystem: %s", 85 "I/O Error Detected. Shutting down filesystem");
90 mp->m_fsname);
91 } 86 }
92 } 87 }
93 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { 88 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
94 cmn_err(CE_ALERT, "Please umount the filesystem, " 89 xfs_alert(mp,
95 "and rectify the problem(s)"); 90 "Please umount the filesystem and rectify the problem(s)");
96 } 91 }
97} 92}
98 93
@@ -106,10 +101,9 @@ xfs_ioerror_alert(
106 xfs_buf_t *bp, 101 xfs_buf_t *bp,
107 xfs_daddr_t blkno) 102 xfs_daddr_t blkno)
108{ 103{
109 cmn_err(CE_ALERT, 104 xfs_alert(mp,
110 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx" 105 "I/O error occurred: meta-data dev %s block 0x%llx"
111 " (\"%s\") error %d buf count %zd", 106 " (\"%s\") error %d buf count %zd",
112 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
113 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)), 107 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
114 (__uint64_t)blkno, func, 108 (__uint64_t)blkno, func,
115 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp)); 109 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
@@ -173,17 +167,9 @@ xfs_extlen_t
173xfs_get_extsz_hint( 167xfs_get_extsz_hint(
174 struct xfs_inode *ip) 168 struct xfs_inode *ip)
175{ 169{
176 xfs_extlen_t extsz; 170 if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
177 171 return ip->i_d.di_extsize;
178 if (unlikely(XFS_IS_REALTIME_INODE(ip))) { 172 if (XFS_IS_REALTIME_INODE(ip))
179 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) 173 return ip->i_mount->m_sb.sb_rextsize;
180 ? ip->i_d.di_extsize 174 return 0;
181 : ip->i_mount->m_sb.sb_rextsize;
182 ASSERT(extsz);
183 } else {
184 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
185 ? ip->i_d.di_extsize : 0;
186 }
187
188 return extsz;
189} 175}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c2042b736b81..06a9759b6352 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -469,8 +469,6 @@ void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
469void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *); 469void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
470void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); 470void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
471void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); 471void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
472int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
473 xfs_ino_t , uint, uint, struct xfs_inode **);
474void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); 472void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
475void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); 473void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
476void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); 474void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index c5bbbc45db91..12aff9584e29 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -563,7 +563,7 @@ xfs_trans_ail_delete_bulk(
563 563
564 spin_unlock(&ailp->xa_lock); 564 spin_unlock(&ailp->xa_lock);
565 if (!XFS_FORCED_SHUTDOWN(mp)) { 565 if (!XFS_FORCED_SHUTDOWN(mp)) {
566 xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp, 566 xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
567 "%s: attempting to delete a log item that is not in the AIL", 567 "%s: attempting to delete a log item that is not in the AIL",
568 __func__); 568 __func__);
569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 569 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c47918c302a5..3bea66132334 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -305,7 +305,7 @@ xfs_trans_read_buf(
305 if (xfs_error_target == target) { 305 if (xfs_error_target == target) {
306 if (((xfs_req_num++) % xfs_error_mod) == 0) { 306 if (((xfs_req_num++) % xfs_error_mod) == 0) {
307 xfs_buf_relse(bp); 307 xfs_buf_relse(bp);
308 cmn_err(CE_DEBUG, "Returning error!\n"); 308 xfs_debug(mp, "Returning error!");
309 return XFS_ERROR(EIO); 309 return XFS_ERROR(EIO);
310 } 310 }
311 } 311 }
@@ -403,7 +403,7 @@ xfs_trans_read_buf(
403 xfs_force_shutdown(tp->t_mountp, 403 xfs_force_shutdown(tp->t_mountp,
404 SHUTDOWN_META_IO_ERROR); 404 SHUTDOWN_META_IO_ERROR);
405 xfs_buf_relse(bp); 405 xfs_buf_relse(bp);
406 cmn_err(CE_DEBUG, "Returning trans error!\n"); 406 xfs_debug(mp, "Returning trans error!");
407 return XFS_ERROR(EIO); 407 return XFS_ERROR(EIO);
408 } 408 }
409 } 409 }
@@ -427,7 +427,7 @@ shutdown_abort:
427 */ 427 */
428#if defined(DEBUG) 428#if defined(DEBUG)
429 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp)) 429 if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
430 cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp); 430 xfs_notice(mp, "about to pop assert, bp == 0x%p", bp);
431#endif 431#endif
432 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) != 432 ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
433 (XBF_STALE|XBF_DELWRI)); 433 (XBF_STALE|XBF_DELWRI));
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index ccb34532768b..16084d8ea231 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -44,28 +44,6 @@ xfs_trans_inode_broot_debug(
44#endif 44#endif
45 45
46/* 46/*
47 * Get an inode and join it to the transaction.
48 */
49int
50xfs_trans_iget(
51 xfs_mount_t *mp,
52 xfs_trans_t *tp,
53 xfs_ino_t ino,
54 uint flags,
55 uint lock_flags,
56 xfs_inode_t **ipp)
57{
58 int error;
59
60 error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp);
61 if (!error && tp) {
62 xfs_trans_ijoin(tp, *ipp);
63 (*ipp)->i_itemp->ili_lock_flags = lock_flags;
64 }
65 return error;
66}
67
68/*
69 * Add a locked inode to the transaction. 47 * Add a locked inode to the transaction.
70 * 48 *
71 * The inode must be locked, and it cannot be associated with any transaction. 49 * The inode must be locked, and it cannot be associated with any transaction.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index d8e6f8cd6f0c..37d8146ee15b 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1189,9 +1189,8 @@ xfs_inactive(
1189 * inode might be lost for a long time or forever. 1189 * inode might be lost for a long time or forever.
1190 */ 1190 */
1191 if (!XFS_FORCED_SHUTDOWN(mp)) { 1191 if (!XFS_FORCED_SHUTDOWN(mp)) {
1192 cmn_err(CE_NOTE, 1192 xfs_notice(mp, "%s: xfs_ifree returned error %d",
1193 "xfs_inactive: xfs_ifree() returned an error = %d on %s", 1193 __func__, error);
1194 error, mp->m_fsname);
1195 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); 1194 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
1196 } 1195 }
1197 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); 1196 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
@@ -1208,12 +1207,12 @@ xfs_inactive(
1208 */ 1207 */
1209 error = xfs_bmap_finish(&tp, &free_list, &committed); 1208 error = xfs_bmap_finish(&tp, &free_list, &committed);
1210 if (error) 1209 if (error)
1211 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1210 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
1212 "xfs_bmap_finish() returned error %d", error); 1211 __func__, error);
1213 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1212 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1214 if (error) 1213 if (error)
1215 xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: " 1214 xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
1216 "xfs_trans_commit() returned error %d", error); 1215 __func__, error);
1217 } 1216 }
1218 1217
1219 /* 1218 /*
@@ -1310,7 +1309,7 @@ xfs_create(
1310 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, 1309 error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
1311 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); 1310 XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
1312 if (error) 1311 if (error)
1313 goto std_return; 1312 return error;
1314 1313
1315 if (is_dir) { 1314 if (is_dir) {
1316 rdev = 0; 1315 rdev = 0;
@@ -1390,12 +1389,6 @@ xfs_create(
1390 } 1389 }
1391 1390
1392 /* 1391 /*
1393 * At this point, we've gotten a newly allocated inode.
1394 * It is locked (and joined to the transaction).
1395 */
1396 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1397
1398 /*
1399 * Now we join the directory inode to the transaction. We do not do it 1392 * Now we join the directory inode to the transaction. We do not do it
1400 * earlier because xfs_dir_ialloc might commit the previous transaction 1393 * earlier because xfs_dir_ialloc might commit the previous transaction
1401 * (and release all the locks). An error from here on will result in 1394 * (and release all the locks). An error from here on will result in
@@ -1440,22 +1433,13 @@ xfs_create(
1440 */ 1433 */
1441 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); 1434 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
1442 1435
1443 /*
1444 * xfs_trans_commit normally decrements the vnode ref count
1445 * when it unlocks the inode. Since we want to return the
1446 * vnode to the caller, we bump the vnode ref count now.
1447 */
1448 IHOLD(ip);
1449
1450 error = xfs_bmap_finish(&tp, &free_list, &committed); 1436 error = xfs_bmap_finish(&tp, &free_list, &committed);
1451 if (error) 1437 if (error)
1452 goto out_abort_rele; 1438 goto out_bmap_cancel;
1453 1439
1454 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 1440 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
1455 if (error) { 1441 if (error)
1456 IRELE(ip); 1442 goto out_release_inode;
1457 goto out_dqrele;
1458 }
1459 1443
1460 xfs_qm_dqrele(udqp); 1444 xfs_qm_dqrele(udqp);
1461 xfs_qm_dqrele(gdqp); 1445 xfs_qm_dqrele(gdqp);
@@ -1469,27 +1453,21 @@ xfs_create(
1469 cancel_flags |= XFS_TRANS_ABORT; 1453 cancel_flags |= XFS_TRANS_ABORT;
1470 out_trans_cancel: 1454 out_trans_cancel:
1471 xfs_trans_cancel(tp, cancel_flags); 1455 xfs_trans_cancel(tp, cancel_flags);
1472 out_dqrele: 1456 out_release_inode:
1457 /*
1458 * Wait until after the current transaction is aborted to
1459 * release the inode. This prevents recursive transactions
1460 * and deadlocks from xfs_inactive.
1461 */
1462 if (ip)
1463 IRELE(ip);
1464
1473 xfs_qm_dqrele(udqp); 1465 xfs_qm_dqrele(udqp);
1474 xfs_qm_dqrele(gdqp); 1466 xfs_qm_dqrele(gdqp);
1475 1467
1476 if (unlock_dp_on_error) 1468 if (unlock_dp_on_error)
1477 xfs_iunlock(dp, XFS_ILOCK_EXCL); 1469 xfs_iunlock(dp, XFS_ILOCK_EXCL);
1478 std_return:
1479 return error; 1470 return error;
1480
1481 out_abort_rele:
1482 /*
1483 * Wait until after the current transaction is aborted to
1484 * release the inode. This prevents recursive transactions
1485 * and deadlocks from xfs_inactive.
1486 */
1487 xfs_bmap_cancel(&free_list);
1488 cancel_flags |= XFS_TRANS_ABORT;
1489 xfs_trans_cancel(tp, cancel_flags);
1490 IRELE(ip);
1491 unlock_dp_on_error = B_FALSE;
1492 goto out_dqrele;
1493} 1471}
1494 1472
1495#ifdef DEBUG 1473#ifdef DEBUG
@@ -2114,9 +2092,8 @@ xfs_symlink(
2114 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, 2092 XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
2115 &first_block, resblks, mval, &nmaps, 2093 &first_block, resblks, mval, &nmaps,
2116 &free_list); 2094 &free_list);
2117 if (error) { 2095 if (error)
2118 goto error1; 2096 goto error2;
2119 }
2120 2097
2121 if (resblks) 2098 if (resblks)
2122 resblks -= fs_blocks; 2099 resblks -= fs_blocks;
@@ -2148,7 +2125,7 @@ xfs_symlink(
2148 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, 2125 error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
2149 &first_block, &free_list, resblks); 2126 &first_block, &free_list, resblks);
2150 if (error) 2127 if (error)
2151 goto error1; 2128 goto error2;
2152 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2129 xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2153 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2130 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2154 2131
@@ -2161,13 +2138,6 @@ xfs_symlink(
2161 xfs_trans_set_sync(tp); 2138 xfs_trans_set_sync(tp);
2162 } 2139 }
2163 2140
2164 /*
2165 * xfs_trans_commit normally decrements the vnode ref count
2166 * when it unlocks the inode. Since we want to return the
2167 * vnode to the caller, we bump the vnode ref count now.
2168 */
2169 IHOLD(ip);
2170
2171 error = xfs_bmap_finish(&tp, &free_list, &committed); 2141 error = xfs_bmap_finish(&tp, &free_list, &committed);
2172 if (error) { 2142 if (error) {
2173 goto error2; 2143 goto error2;